i965: remove trailing spaces in various files
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "main/shaderimage.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_instruction.h"
40 #include "main/framebuffer.h"
41
42 #include "intel_mipmap_tree.h"
43 #include "intel_batchbuffer.h"
44 #include "intel_tex.h"
45 #include "intel_fbo.h"
46 #include "intel_buffer_objects.h"
47
48 #include "brw_context.h"
49 #include "brw_state.h"
50 #include "brw_defines.h"
51 #include "brw_wm.h"
52
53 GLuint
54 translate_tex_target(GLenum target)
55 {
56 switch (target) {
57 case GL_TEXTURE_1D:
58 case GL_TEXTURE_1D_ARRAY_EXT:
59 return BRW_SURFACE_1D;
60
61 case GL_TEXTURE_RECTANGLE_NV:
62 return BRW_SURFACE_2D;
63
64 case GL_TEXTURE_2D:
65 case GL_TEXTURE_2D_ARRAY_EXT:
66 case GL_TEXTURE_EXTERNAL_OES:
67 case GL_TEXTURE_2D_MULTISAMPLE:
68 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
69 return BRW_SURFACE_2D;
70
71 case GL_TEXTURE_3D:
72 return BRW_SURFACE_3D;
73
74 case GL_TEXTURE_CUBE_MAP:
75 case GL_TEXTURE_CUBE_MAP_ARRAY:
76 return BRW_SURFACE_CUBE;
77
78 default:
79 unreachable("not reached");
80 }
81 }
82
83 uint32_t
84 brw_get_surface_tiling_bits(uint32_t tiling)
85 {
86 switch (tiling) {
87 case I915_TILING_X:
88 return BRW_SURFACE_TILED;
89 case I915_TILING_Y:
90 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
91 default:
92 return 0;
93 }
94 }
95
96
97 uint32_t
98 brw_get_surface_num_multisamples(unsigned num_samples)
99 {
100 if (num_samples > 1)
101 return BRW_SURFACE_MULTISAMPLECOUNT_4;
102 else
103 return BRW_SURFACE_MULTISAMPLECOUNT_1;
104 }
105
106 void
107 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
108 bool is_render_target,
109 unsigned *width, unsigned *height,
110 unsigned *pitch, uint32_t *tiling, unsigned *format)
111 {
112 static const unsigned halign_stencil = 8;
113
114 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
115 * there are half as many rows.
116 * In addition, mip-levels are accessed manually by the program and
117 * therefore the surface is setup to cover all the mip-levels for one slice.
118 * (Hardware is still used to access individual slices).
119 */
120 *tiling = I915_TILING_Y;
121 *pitch = mt->pitch * 2;
122 *width = ALIGN(mt->total_width, halign_stencil) * 2;
123 *height = (mt->total_height / mt->physical_depth0) / 2;
124
125 if (is_render_target) {
126 *format = BRW_SURFACEFORMAT_R8_UINT;
127 }
128 }
129
130
131 /**
132 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
133 * swizzling.
134 */
135 int
136 brw_get_texture_swizzle(const struct gl_context *ctx,
137 const struct gl_texture_object *t)
138 {
139 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
140
141 int swizzles[SWIZZLE_NIL + 1] = {
142 SWIZZLE_X,
143 SWIZZLE_Y,
144 SWIZZLE_Z,
145 SWIZZLE_W,
146 SWIZZLE_ZERO,
147 SWIZZLE_ONE,
148 SWIZZLE_NIL
149 };
150
151 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
152 img->_BaseFormat == GL_DEPTH_STENCIL) {
153 GLenum depth_mode = t->DepthMode;
154
155 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
156 * with depth component data specified with a sized internal format.
157 * Otherwise, it's left at the old default, GL_LUMINANCE.
158 */
159 if (_mesa_is_gles3(ctx) &&
160 img->InternalFormat != GL_DEPTH_COMPONENT &&
161 img->InternalFormat != GL_DEPTH_STENCIL) {
162 depth_mode = GL_RED;
163 }
164
165 switch (depth_mode) {
166 case GL_ALPHA:
167 swizzles[0] = SWIZZLE_ZERO;
168 swizzles[1] = SWIZZLE_ZERO;
169 swizzles[2] = SWIZZLE_ZERO;
170 swizzles[3] = SWIZZLE_X;
171 break;
172 case GL_LUMINANCE:
173 swizzles[0] = SWIZZLE_X;
174 swizzles[1] = SWIZZLE_X;
175 swizzles[2] = SWIZZLE_X;
176 swizzles[3] = SWIZZLE_ONE;
177 break;
178 case GL_INTENSITY:
179 swizzles[0] = SWIZZLE_X;
180 swizzles[1] = SWIZZLE_X;
181 swizzles[2] = SWIZZLE_X;
182 swizzles[3] = SWIZZLE_X;
183 break;
184 case GL_RED:
185 swizzles[0] = SWIZZLE_X;
186 swizzles[1] = SWIZZLE_ZERO;
187 swizzles[2] = SWIZZLE_ZERO;
188 swizzles[3] = SWIZZLE_ONE;
189 break;
190 }
191 }
192
193 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
194
195 /* If the texture's format is alpha-only, force R, G, and B to
196 * 0.0. Similarly, if the texture's format has no alpha channel,
197 * force the alpha value read to 1.0. This allows for the
198 * implementation to use an RGBA texture for any of these formats
199 * without leaking any unexpected values.
200 */
201 switch (img->_BaseFormat) {
202 case GL_ALPHA:
203 swizzles[0] = SWIZZLE_ZERO;
204 swizzles[1] = SWIZZLE_ZERO;
205 swizzles[2] = SWIZZLE_ZERO;
206 break;
207 case GL_LUMINANCE:
208 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
209 swizzles[0] = SWIZZLE_X;
210 swizzles[1] = SWIZZLE_X;
211 swizzles[2] = SWIZZLE_X;
212 swizzles[3] = SWIZZLE_ONE;
213 }
214 break;
215 case GL_LUMINANCE_ALPHA:
216 if (datatype == GL_SIGNED_NORMALIZED) {
217 swizzles[0] = SWIZZLE_X;
218 swizzles[1] = SWIZZLE_X;
219 swizzles[2] = SWIZZLE_X;
220 swizzles[3] = SWIZZLE_W;
221 }
222 break;
223 case GL_INTENSITY:
224 if (datatype == GL_SIGNED_NORMALIZED) {
225 swizzles[0] = SWIZZLE_X;
226 swizzles[1] = SWIZZLE_X;
227 swizzles[2] = SWIZZLE_X;
228 swizzles[3] = SWIZZLE_X;
229 }
230 break;
231 case GL_RED:
232 case GL_RG:
233 case GL_RGB:
234 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
235 swizzles[3] = SWIZZLE_ONE;
236 break;
237 }
238
239 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
240 swizzles[GET_SWZ(t->_Swizzle, 1)],
241 swizzles[GET_SWZ(t->_Swizzle, 2)],
242 swizzles[GET_SWZ(t->_Swizzle, 3)]);
243 }
244
245 static void
246 gen4_emit_buffer_surface_state(struct brw_context *brw,
247 uint32_t *out_offset,
248 drm_intel_bo *bo,
249 unsigned buffer_offset,
250 unsigned surface_format,
251 unsigned buffer_size,
252 unsigned pitch,
253 bool rw)
254 {
255 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
256 6 * 4, 32, out_offset);
257 memset(surf, 0, 6 * 4);
258
259 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
260 surface_format << BRW_SURFACE_FORMAT_SHIFT |
261 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
262 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
263 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
264 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
265 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
266 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
267
268 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
269 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
270 * physical cache. It is mapped in hardware to the sampler cache."
271 */
272 if (bo) {
273 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
274 bo, buffer_offset,
275 I915_GEM_DOMAIN_SAMPLER,
276 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
277 }
278 }
279
280 void
281 brw_update_buffer_texture_surface(struct gl_context *ctx,
282 unsigned unit,
283 uint32_t *surf_offset)
284 {
285 struct brw_context *brw = brw_context(ctx);
286 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
287 struct intel_buffer_object *intel_obj =
288 intel_buffer_object(tObj->BufferObject);
289 uint32_t size = tObj->BufferSize;
290 drm_intel_bo *bo = NULL;
291 mesa_format format = tObj->_BufferObjectFormat;
292 uint32_t brw_format = brw_format_for_mesa_format(format);
293 int texel_size = _mesa_get_format_bytes(format);
294
295 if (intel_obj) {
296 size = MIN2(size, intel_obj->Base.Size);
297 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
298 }
299
300 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
301 _mesa_problem(NULL, "bad format %s for texture buffer\n",
302 _mesa_get_format_name(format));
303 }
304
305 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
306 tObj->BufferOffset,
307 brw_format,
308 size / texel_size,
309 texel_size,
310 false /* rw */);
311 }
312
313 static void
314 brw_update_texture_surface(struct gl_context *ctx,
315 unsigned unit,
316 uint32_t *surf_offset,
317 bool for_gather)
318 {
319 struct brw_context *brw = brw_context(ctx);
320 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
321 struct intel_texture_object *intelObj = intel_texture_object(tObj);
322 struct intel_mipmap_tree *mt = intelObj->mt;
323 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
324 uint32_t *surf;
325
326 /* BRW_NEW_TEXTURE_BUFFER */
327 if (tObj->Target == GL_TEXTURE_BUFFER) {
328 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
329 return;
330 }
331
332 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
333 6 * 4, 32, surf_offset);
334
335 uint32_t tex_format = translate_tex_format(brw, mt->format,
336 sampler->sRGBDecode);
337
338 if (for_gather) {
339 /* Sandybridge's gather4 message is broken for integer formats.
340 * To work around this, we pretend the surface is UNORM for
341 * 8 or 16-bit formats, and emit shader instructions to recover
342 * the real INT/UINT value. For 32-bit formats, we pretend
343 * the surface is FLOAT, and simply reinterpret the resulting
344 * bits.
345 */
346 switch (tex_format) {
347 case BRW_SURFACEFORMAT_R8_SINT:
348 case BRW_SURFACEFORMAT_R8_UINT:
349 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
350 break;
351
352 case BRW_SURFACEFORMAT_R16_SINT:
353 case BRW_SURFACEFORMAT_R16_UINT:
354 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
355 break;
356
357 case BRW_SURFACEFORMAT_R32_SINT:
358 case BRW_SURFACEFORMAT_R32_UINT:
359 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
360 break;
361
362 default:
363 break;
364 }
365 }
366
367 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
368 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
369 BRW_SURFACE_CUBEFACE_ENABLES |
370 tex_format << BRW_SURFACE_FORMAT_SHIFT);
371
372 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
373
374 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
375 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
376 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
377
378 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
379 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
380 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
381
382 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
383 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
384
385 surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
386
387 /* Emit relocation to surface contents */
388 drm_intel_bo_emit_reloc(brw->batch.bo,
389 *surf_offset + 4,
390 mt->bo,
391 surf[1] - mt->bo->offset64,
392 I915_GEM_DOMAIN_SAMPLER, 0);
393 }
394
395 /**
396 * Create the constant buffer surface. Vertex/fragment shader constants will be
397 * read from this buffer with Data Port Read instructions/messages.
398 */
399 void
400 brw_create_constant_surface(struct brw_context *brw,
401 drm_intel_bo *bo,
402 uint32_t offset,
403 uint32_t size,
404 uint32_t *out_offset,
405 bool dword_pitch)
406 {
407 uint32_t stride = dword_pitch ? 4 : 16;
408 uint32_t elements = ALIGN(size, stride) / stride;
409
410 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
411 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
412 elements, stride, false);
413 }
414
415 /**
416 * Create the buffer surface. Shader buffer variables will be
417 * read from / write to this buffer with Data Port Read/Write
418 * instructions/messages.
419 */
420 void
421 brw_create_buffer_surface(struct brw_context *brw,
422 drm_intel_bo *bo,
423 uint32_t offset,
424 uint32_t size,
425 uint32_t *out_offset,
426 bool dword_pitch)
427 {
428 /* Use a raw surface so we can reuse existing untyped read/write/atomic
429 * messages. We need these specifically for the fragment shader since they
430 * include a pixel mask header that we need to ensure correct behavior
431 * with helper invocations, which cannot write to the buffer.
432 */
433 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
434 BRW_SURFACEFORMAT_RAW,
435 size, 1, true);
436 }
437
438 /**
439 * Set up a binding table entry for use by stream output logic (transform
440 * feedback).
441 *
442 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
443 */
444 void
445 brw_update_sol_surface(struct brw_context *brw,
446 struct gl_buffer_object *buffer_obj,
447 uint32_t *out_offset, unsigned num_vector_components,
448 unsigned stride_dwords, unsigned offset_dwords)
449 {
450 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
451 uint32_t offset_bytes = 4 * offset_dwords;
452 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
453 offset_bytes,
454 buffer_obj->Size - offset_bytes);
455 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
456 out_offset);
457 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
458 size_t size_dwords = buffer_obj->Size / 4;
459 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
460
461 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
462 * too big to map using a single binding table entry?
463 */
464 assert((size_dwords - offset_dwords) / stride_dwords
465 <= BRW_MAX_NUM_BUFFER_ENTRIES);
466
467 if (size_dwords > offset_dwords + num_vector_components) {
468 /* There is room for at least 1 transform feedback output in the buffer.
469 * Compute the number of additional transform feedback outputs the
470 * buffer has room for.
471 */
472 buffer_size_minus_1 =
473 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
474 } else {
475 /* There isn't even room for a single transform feedback output in the
476 * buffer. We can't configure the binding table entry to prevent output
477 * entirely; we'll have to rely on the geometry shader to detect
478 * overflow. But to minimize the damage in case of a bug, set up the
479 * binding table entry to just allow a single output.
480 */
481 buffer_size_minus_1 = 0;
482 }
483 width = buffer_size_minus_1 & 0x7f;
484 height = (buffer_size_minus_1 & 0xfff80) >> 7;
485 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
486
487 switch (num_vector_components) {
488 case 1:
489 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
490 break;
491 case 2:
492 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
493 break;
494 case 3:
495 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
496 break;
497 case 4:
498 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
499 break;
500 default:
501 unreachable("Invalid vector size for transform feedback output");
502 }
503
504 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
505 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
506 surface_format << BRW_SURFACE_FORMAT_SHIFT |
507 BRW_SURFACE_RC_READ_WRITE;
508 surf[1] = bo->offset64 + offset_bytes; /* reloc */
509 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
510 height << BRW_SURFACE_HEIGHT_SHIFT);
511 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
512 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
513 surf[4] = 0;
514 surf[5] = 0;
515
516 /* Emit relocation to surface contents. */
517 drm_intel_bo_emit_reloc(brw->batch.bo,
518 *out_offset + 4,
519 bo, offset_bytes,
520 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
521 }
522
523 /* Creates a new WM constant buffer reflecting the current fragment program's
524 * constants, if needed by the fragment program.
525 *
526 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
527 * state atom.
528 */
529 static void
530 brw_upload_wm_pull_constants(struct brw_context *brw)
531 {
532 struct brw_stage_state *stage_state = &brw->wm.base;
533 /* BRW_NEW_FRAGMENT_PROGRAM */
534 struct brw_fragment_program *fp =
535 (struct brw_fragment_program *) brw->fragment_program;
536 /* BRW_NEW_FS_PROG_DATA */
537 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
538
539 /* _NEW_PROGRAM_CONSTANTS */
540 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
541 stage_state, prog_data, true);
542 }
543
544 const struct brw_tracked_state brw_wm_pull_constants = {
545 .dirty = {
546 .mesa = _NEW_PROGRAM_CONSTANTS,
547 .brw = BRW_NEW_BATCH |
548 BRW_NEW_FRAGMENT_PROGRAM |
549 BRW_NEW_FS_PROG_DATA,
550 },
551 .emit = brw_upload_wm_pull_constants,
552 };
553
554 /**
555 * Creates a null renderbuffer surface.
556 *
557 * This is used when the shader doesn't write to any color output. An FB
558 * write to target 0 will still be emitted, because that's how the thread is
559 * terminated (and computed depth is returned), so we need to have the
560 * hardware discard the target 0 color output..
561 */
562 static void
563 brw_emit_null_surface_state(struct brw_context *brw,
564 unsigned width,
565 unsigned height,
566 unsigned samples,
567 uint32_t *out_offset)
568 {
569 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
570 * Notes):
571 *
572 * A null surface will be used in instances where an actual surface is
573 * not bound. When a write message is generated to a null surface, no
574 * actual surface is written to. When a read message (including any
575 * sampling engine message) is generated to a null surface, the result
576 * is all zeros. Note that a null surface type is allowed to be used
577 * with all messages, even if it is not specificially indicated as
578 * supported. All of the remaining fields in surface state are ignored
579 * for null surfaces, with the following exceptions:
580 *
581 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
582 * depth buffer’s corresponding state for all render target surfaces,
583 * including null.
584 *
585 * - Surface Format must be R8G8B8A8_UNORM.
586 */
587 unsigned surface_type = BRW_SURFACE_NULL;
588 drm_intel_bo *bo = NULL;
589 unsigned pitch_minus_1 = 0;
590 uint32_t multisampling_state = 0;
591 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
592 out_offset);
593
594 if (samples > 1) {
595 /* On Gen6, null render targets seem to cause GPU hangs when
596 * multisampling. So work around this problem by rendering into dummy
597 * color buffer.
598 *
599 * To decrease the amount of memory needed by the workaround buffer, we
600 * set its pitch to 128 bytes (the width of a Y tile). This means that
601 * the amount of memory needed for the workaround buffer is
602 * (width_in_tiles + height_in_tiles - 1) tiles.
603 *
604 * Note that since the workaround buffer will be interpreted by the
605 * hardware as an interleaved multisampled buffer, we need to compute
606 * width_in_tiles and height_in_tiles by dividing the width and height
607 * by 16 rather than the normal Y-tile size of 32.
608 */
609 unsigned width_in_tiles = ALIGN(width, 16) / 16;
610 unsigned height_in_tiles = ALIGN(height, 16) / 16;
611 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
612 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
613 size_needed);
614 bo = brw->wm.multisampled_null_render_target_bo;
615 surface_type = BRW_SURFACE_2D;
616 pitch_minus_1 = 127;
617 multisampling_state = brw_get_surface_num_multisamples(samples);
618 }
619
620 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
621 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
622 if (brw->gen < 6) {
623 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
624 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
625 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
626 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
627 }
628 surf[1] = bo ? bo->offset64 : 0;
629 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
630 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
631
632 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
633 * Notes):
634 *
635 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
636 */
637 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
638 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
639 surf[4] = multisampling_state;
640 surf[5] = 0;
641
642 if (bo) {
643 drm_intel_bo_emit_reloc(brw->batch.bo,
644 *out_offset + 4,
645 bo, 0,
646 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
647 }
648 }
649
650 /**
651 * Sets up a surface state structure to point at the given region.
652 * While it is only used for the front/back buffer currently, it should be
653 * usable for further buffers when doing ARB_draw_buffer support.
654 */
655 static uint32_t
656 brw_update_renderbuffer_surface(struct brw_context *brw,
657 struct gl_renderbuffer *rb,
658 bool layered, unsigned unit,
659 uint32_t surf_index)
660 {
661 struct gl_context *ctx = &brw->ctx;
662 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
663 struct intel_mipmap_tree *mt = irb->mt;
664 uint32_t *surf;
665 uint32_t tile_x, tile_y;
666 uint32_t format = 0;
667 uint32_t offset;
668 /* _NEW_BUFFERS */
669 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
670 /* BRW_NEW_FS_PROG_DATA */
671
672 assert(!layered);
673
674 if (rb->TexImage && !brw->has_surface_tile_offset) {
675 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
676
677 if (tile_x != 0 || tile_y != 0) {
678 /* Original gen4 hardware couldn't draw to a non-tile-aligned
679 * destination in a miptree unless you actually setup your renderbuffer
680 * as a miptree and used the fragile lod/array_index/etc. controls to
681 * select the image. So, instead, we just make a new single-level
682 * miptree and render into that.
683 */
684 intel_renderbuffer_move_to_temp(brw, irb, false);
685 mt = irb->mt;
686 }
687 }
688
689 intel_miptree_used_for_rendering(irb->mt);
690
691 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
692
693 format = brw->render_target_format[rb_format];
694 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
695 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
696 __func__, _mesa_get_format_name(rb_format));
697 }
698
699 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
700 format << BRW_SURFACE_FORMAT_SHIFT);
701
702 /* reloc */
703 assert(mt->offset % mt->cpp == 0);
704 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
705 mt->bo->offset64 + mt->offset);
706
707 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
708 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
709
710 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
711 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
712
713 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
714
715 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
716 /* Note that the low bits of these fields are missing, so
717 * there's the possibility of getting in trouble.
718 */
719 assert(tile_x % 4 == 0);
720 assert(tile_y % 2 == 0);
721 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
722 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
723 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
724
725 if (brw->gen < 6) {
726 /* _NEW_COLOR */
727 if (!ctx->Color.ColorLogicOpEnabled &&
728 (ctx->Color.BlendEnabled & (1 << unit)))
729 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
730
731 if (!ctx->Color.ColorMask[unit][0])
732 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
733 if (!ctx->Color.ColorMask[unit][1])
734 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
735 if (!ctx->Color.ColorMask[unit][2])
736 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
737
738 /* As mentioned above, disable writes to the alpha component when the
739 * renderbuffer is XRGB.
740 */
741 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
742 !ctx->Color.ColorMask[unit][3]) {
743 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
744 }
745 }
746
747 drm_intel_bo_emit_reloc(brw->batch.bo,
748 offset + 4,
749 mt->bo,
750 surf[1] - mt->bo->offset64,
751 I915_GEM_DOMAIN_RENDER,
752 I915_GEM_DOMAIN_RENDER);
753
754 return offset;
755 }
756
757 /**
758 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
759 */
760 void
761 brw_update_renderbuffer_surfaces(struct brw_context *brw,
762 const struct gl_framebuffer *fb,
763 uint32_t render_target_start,
764 uint32_t *surf_offset)
765 {
766 GLuint i;
767 const unsigned int w = _mesa_geometric_width(fb);
768 const unsigned int h = _mesa_geometric_height(fb);
769 const unsigned int s = _mesa_geometric_samples(fb);
770
771 /* Update surfaces for drawing buffers */
772 if (fb->_NumColorDrawBuffers >= 1) {
773 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
774 const uint32_t surf_index = render_target_start + i;
775
776 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
777 surf_offset[surf_index] =
778 brw->vtbl.update_renderbuffer_surface(
779 brw, fb->_ColorDrawBuffers[i],
780 _mesa_geometric_layers(fb) > 0, i, surf_index);
781 } else {
782 brw->vtbl.emit_null_surface_state(brw, w, h, s,
783 &surf_offset[surf_index]);
784 }
785 }
786 } else {
787 const uint32_t surf_index = render_target_start;
788 brw->vtbl.emit_null_surface_state(brw, w, h, s,
789 &surf_offset[surf_index]);
790 }
791 }
792
793 static void
794 update_renderbuffer_surfaces(struct brw_context *brw)
795 {
796 const struct gl_context *ctx = &brw->ctx;
797
798 /* _NEW_BUFFERS | _NEW_COLOR */
799 const struct gl_framebuffer *fb = ctx->DrawBuffer;
800 brw_update_renderbuffer_surfaces(
801 brw, fb,
802 brw->wm.prog_data->binding_table.render_target_start,
803 brw->wm.base.surf_offset);
804 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
805 }
806
807 const struct brw_tracked_state brw_renderbuffer_surfaces = {
808 .dirty = {
809 .mesa = _NEW_BUFFERS |
810 _NEW_COLOR,
811 .brw = BRW_NEW_BATCH |
812 BRW_NEW_FS_PROG_DATA,
813 },
814 .emit = update_renderbuffer_surfaces,
815 };
816
817 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
818 .dirty = {
819 .mesa = _NEW_BUFFERS,
820 .brw = BRW_NEW_BATCH,
821 },
822 .emit = update_renderbuffer_surfaces,
823 };
824
825
826 static void
827 update_stage_texture_surfaces(struct brw_context *brw,
828 const struct gl_program *prog,
829 struct brw_stage_state *stage_state,
830 bool for_gather)
831 {
832 if (!prog)
833 return;
834
835 struct gl_context *ctx = &brw->ctx;
836
837 uint32_t *surf_offset = stage_state->surf_offset;
838
839 /* BRW_NEW_*_PROG_DATA */
840 if (for_gather)
841 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
842 else
843 surf_offset += stage_state->prog_data->binding_table.texture_start;
844
845 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
846 for (unsigned s = 0; s < num_samplers; s++) {
847 surf_offset[s] = 0;
848
849 if (prog->SamplersUsed & (1 << s)) {
850 const unsigned unit = prog->SamplerUnits[s];
851
852 /* _NEW_TEXTURE */
853 if (ctx->Texture.Unit[unit]._Current) {
854 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
855 }
856 }
857 }
858 }
859
860
861 /**
862 * Construct SURFACE_STATE objects for enabled textures.
863 */
864 static void
865 brw_update_texture_surfaces(struct brw_context *brw)
866 {
867 /* BRW_NEW_VERTEX_PROGRAM */
868 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
869
870 /* BRW_NEW_GEOMETRY_PROGRAM */
871 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
872
873 /* BRW_NEW_FRAGMENT_PROGRAM */
874 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
875
876 /* BRW_NEW_COMPUTE_PROGRAM */
877 struct gl_program *cs = (struct gl_program *) brw->compute_program;
878
879 /* _NEW_TEXTURE */
880 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
881 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
882 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
883 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
884
885 /* emit alternate set of surface state for gather. this
886 * allows the surface format to be overriden for only the
887 * gather4 messages. */
888 if (brw->gen < 8) {
889 if (vs && vs->UsesGather)
890 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
891 if (gs && gs->UsesGather)
892 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
893 if (fs && fs->UsesGather)
894 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
895 if (cs && cs->UsesGather)
896 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
897 }
898
899 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
900 }
901
902 const struct brw_tracked_state brw_texture_surfaces = {
903 .dirty = {
904 .mesa = _NEW_TEXTURE,
905 .brw = BRW_NEW_BATCH |
906 BRW_NEW_COMPUTE_PROGRAM |
907 BRW_NEW_FRAGMENT_PROGRAM |
908 BRW_NEW_FS_PROG_DATA |
909 BRW_NEW_GEOMETRY_PROGRAM |
910 BRW_NEW_GS_PROG_DATA |
911 BRW_NEW_TEXTURE_BUFFER |
912 BRW_NEW_VERTEX_PROGRAM |
913 BRW_NEW_VS_PROG_DATA,
914 },
915 .emit = brw_update_texture_surfaces,
916 };
917
918 void
919 brw_upload_ubo_surfaces(struct brw_context *brw,
920 struct gl_shader *shader,
921 struct brw_stage_state *stage_state,
922 struct brw_stage_prog_data *prog_data,
923 bool dword_pitch)
924 {
925 struct gl_context *ctx = &brw->ctx;
926
927 if (!shader)
928 return;
929
930 uint32_t *ubo_surf_offsets =
931 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
932
933 for (int i = 0; i < shader->NumUniformBlocks; i++) {
934 struct gl_uniform_buffer_binding *binding =
935 &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
936
937 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
938 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
939 } else {
940 struct intel_buffer_object *intel_bo =
941 intel_buffer_object(binding->BufferObject);
942 drm_intel_bo *bo =
943 intel_bufferobj_buffer(brw, intel_bo,
944 binding->Offset,
945 binding->BufferObject->Size - binding->Offset);
946 brw_create_constant_surface(brw, bo, binding->Offset,
947 binding->BufferObject->Size - binding->Offset,
948 &ubo_surf_offsets[i],
949 dword_pitch);
950 }
951 }
952
953 uint32_t *ssbo_surf_offsets =
954 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
955
956 for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
957 struct gl_shader_storage_buffer_binding *binding =
958 &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
959
960 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
961 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
962 } else {
963 struct intel_buffer_object *intel_bo =
964 intel_buffer_object(binding->BufferObject);
965 drm_intel_bo *bo =
966 intel_bufferobj_buffer(brw, intel_bo,
967 binding->Offset,
968 binding->BufferObject->Size - binding->Offset);
969 brw_create_buffer_surface(brw, bo, binding->Offset,
970 binding->BufferObject->Size - binding->Offset,
971 &ssbo_surf_offsets[i],
972 dword_pitch);
973 }
974 }
975
976 if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
977 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
978 }
979
980 static void
981 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
982 {
983 struct gl_context *ctx = &brw->ctx;
984 /* _NEW_PROGRAM */
985 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
986
987 if (!prog)
988 return;
989
990 /* BRW_NEW_FS_PROG_DATA */
991 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
992 &brw->wm.base, &brw->wm.prog_data->base, true);
993 }
994
995 const struct brw_tracked_state brw_wm_ubo_surfaces = {
996 .dirty = {
997 .mesa = _NEW_PROGRAM,
998 .brw = BRW_NEW_BATCH |
999 BRW_NEW_FS_PROG_DATA |
1000 BRW_NEW_UNIFORM_BUFFER,
1001 },
1002 .emit = brw_upload_wm_ubo_surfaces,
1003 };
1004
1005 static void
1006 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1007 {
1008 struct gl_context *ctx = &brw->ctx;
1009 /* _NEW_PROGRAM */
1010 struct gl_shader_program *prog =
1011 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1012
1013 if (!prog)
1014 return;
1015
1016 /* BRW_NEW_CS_PROG_DATA */
1017 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1018 &brw->cs.base, &brw->cs.prog_data->base, true);
1019 }
1020
1021 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1022 .dirty = {
1023 .mesa = _NEW_PROGRAM,
1024 .brw = BRW_NEW_BATCH |
1025 BRW_NEW_CS_PROG_DATA |
1026 BRW_NEW_UNIFORM_BUFFER,
1027 },
1028 .emit = brw_upload_cs_ubo_surfaces,
1029 };
1030
1031 void
1032 brw_upload_abo_surfaces(struct brw_context *brw,
1033 struct gl_shader *shader,
1034 struct brw_stage_state *stage_state,
1035 struct brw_stage_prog_data *prog_data)
1036 {
1037 struct gl_context *ctx = &brw->ctx;
1038 uint32_t *surf_offsets =
1039 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1040
1041 if (shader && shader->NumAtomicBuffers) {
1042 for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1043 struct gl_atomic_buffer_binding *binding =
1044 &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1045 struct intel_buffer_object *intel_bo =
1046 intel_buffer_object(binding->BufferObject);
1047 drm_intel_bo *bo = intel_bufferobj_buffer(
1048 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1049
1050 brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1051 binding->Offset, BRW_SURFACEFORMAT_RAW,
1052 bo->size - binding->Offset, 1, true);
1053 }
1054
1055 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1056 }
1057 }
1058
1059 static void
1060 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1061 {
1062 struct gl_context *ctx = &brw->ctx;
1063 /* _NEW_PROGRAM */
1064 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
1065
1066 if (prog) {
1067 /* BRW_NEW_FS_PROG_DATA */
1068 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1069 &brw->wm.base, &brw->wm.prog_data->base);
1070 }
1071 }
1072
1073 const struct brw_tracked_state brw_wm_abo_surfaces = {
1074 .dirty = {
1075 .mesa = _NEW_PROGRAM,
1076 .brw = BRW_NEW_ATOMIC_BUFFER |
1077 BRW_NEW_BATCH |
1078 BRW_NEW_FS_PROG_DATA,
1079 },
1080 .emit = brw_upload_wm_abo_surfaces,
1081 };
1082
1083 static void
1084 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1085 {
1086 struct gl_context *ctx = &brw->ctx;
1087 /* _NEW_PROGRAM */
1088 struct gl_shader_program *prog =
1089 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1090
1091 if (prog) {
1092 /* BRW_NEW_CS_PROG_DATA */
1093 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1094 &brw->cs.base, &brw->cs.prog_data->base);
1095 }
1096 }
1097
1098 const struct brw_tracked_state brw_cs_abo_surfaces = {
1099 .dirty = {
1100 .mesa = _NEW_PROGRAM,
1101 .brw = BRW_NEW_ATOMIC_BUFFER |
1102 BRW_NEW_BATCH |
1103 BRW_NEW_CS_PROG_DATA,
1104 },
1105 .emit = brw_upload_cs_abo_surfaces,
1106 };
1107
1108 static void
1109 brw_upload_cs_image_surfaces(struct brw_context *brw)
1110 {
1111 struct gl_context *ctx = &brw->ctx;
1112 /* _NEW_PROGRAM */
1113 struct gl_shader_program *prog =
1114 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1115
1116 if (prog) {
1117 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1118 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1119 &brw->cs.base, &brw->cs.prog_data->base);
1120 }
1121 }
1122
1123 const struct brw_tracked_state brw_cs_image_surfaces = {
1124 .dirty = {
1125 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1126 .brw = BRW_NEW_BATCH |
1127 BRW_NEW_CS_PROG_DATA |
1128 BRW_NEW_IMAGE_UNITS
1129 },
1130 .emit = brw_upload_cs_image_surfaces,
1131 };
1132
1133 static uint32_t
1134 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1135 {
1136 if (access == GL_WRITE_ONLY) {
1137 return brw_format_for_mesa_format(format);
1138 } else {
1139 /* Typed surface reads support a very limited subset of the shader
1140 * image formats. Translate it into the closest format the
1141 * hardware supports.
1142 */
1143 if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1144 (_mesa_get_format_bytes(format) >= 8 &&
1145 (brw->gen == 7 && !brw->is_haswell)))
1146 return BRW_SURFACEFORMAT_RAW;
1147 else
1148 return brw_format_for_mesa_format(
1149 brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1150 }
1151 }
1152
1153 static void
1154 update_default_image_param(struct brw_context *brw,
1155 struct gl_image_unit *u,
1156 unsigned surface_idx,
1157 struct brw_image_param *param)
1158 {
1159 memset(param, 0, sizeof(*param));
1160 param->surface_idx = surface_idx;
1161 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1162 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1163 * detailed explanation of these parameters.
1164 */
1165 param->swizzling[0] = 0xff;
1166 param->swizzling[1] = 0xff;
1167 }
1168
1169 static void
1170 update_buffer_image_param(struct brw_context *brw,
1171 struct gl_image_unit *u,
1172 unsigned surface_idx,
1173 struct brw_image_param *param)
1174 {
1175 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1176
1177 update_default_image_param(brw, u, surface_idx, param);
1178
1179 param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1180 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1181 }
1182
1183 static void
1184 update_texture_image_param(struct brw_context *brw,
1185 struct gl_image_unit *u,
1186 unsigned surface_idx,
1187 struct brw_image_param *param)
1188 {
1189 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1190
1191 update_default_image_param(brw, u, surface_idx, param);
1192
1193 param->size[0] = minify(mt->logical_width0, u->Level);
1194 param->size[1] = minify(mt->logical_height0, u->Level);
1195 param->size[2] = (!u->Layered ? 1 :
1196 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1197 u->TexObj->Target == GL_TEXTURE_3D ?
1198 minify(mt->logical_depth0, u->Level) :
1199 mt->logical_depth0);
1200
1201 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1202 &param->offset[0],
1203 &param->offset[1]);
1204
1205 param->stride[0] = mt->cpp;
1206 param->stride[1] = mt->pitch / mt->cpp;
1207 param->stride[2] =
1208 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1209 param->stride[3] =
1210 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1211
1212 if (mt->tiling == I915_TILING_X) {
1213 /* An X tile is a rectangular block of 512x8 bytes. */
1214 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1215 param->tiling[1] = _mesa_logbase2(8);
1216
1217 if (brw->has_swizzling) {
1218 /* Right shifts required to swizzle bits 9 and 10 of the memory
1219 * address with bit 6.
1220 */
1221 param->swizzling[0] = 3;
1222 param->swizzling[1] = 4;
1223 }
1224 } else if (mt->tiling == I915_TILING_Y) {
1225 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1226 * different to the layout of an X-tiled surface, we simply pretend that
1227 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1228 * one arranged in X-major order just like is the case for X-tiling.
1229 */
1230 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1231 param->tiling[1] = _mesa_logbase2(32);
1232
1233 if (brw->has_swizzling) {
1234 /* Right shift required to swizzle bit 9 of the memory address with
1235 * bit 6.
1236 */
1237 param->swizzling[0] = 3;
1238 }
1239 }
1240
1241 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1242 * address calculation algorithm (emit_address_calculation() in
1243 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1244 * modulus equal to the LOD.
1245 */
1246 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1247 0);
1248 }
1249
1250 static void
1251 update_image_surface(struct brw_context *brw,
1252 struct gl_image_unit *u,
1253 GLenum access,
1254 unsigned surface_idx,
1255 uint32_t *surf_offset,
1256 struct brw_image_param *param)
1257 {
1258 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1259 struct gl_texture_object *obj = u->TexObj;
1260 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1261
1262 if (obj->Target == GL_TEXTURE_BUFFER) {
1263 struct intel_buffer_object *intel_obj =
1264 intel_buffer_object(obj->BufferObject);
1265 const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1266 _mesa_get_format_bytes(u->_ActualFormat));
1267
1268 brw->vtbl.emit_buffer_surface_state(
1269 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1270 format, intel_obj->Base.Size / texel_size, texel_size,
1271 access != GL_READ_ONLY);
1272
1273 update_buffer_image_param(brw, u, surface_idx, param);
1274
1275 } else {
1276 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1277 struct intel_mipmap_tree *mt = intel_obj->mt;
1278
1279 if (format == BRW_SURFACEFORMAT_RAW) {
1280 brw->vtbl.emit_buffer_surface_state(
1281 brw, surf_offset, mt->bo, mt->offset,
1282 format, mt->bo->size - mt->offset, 1 /* pitch */,
1283 access != GL_READ_ONLY);
1284
1285 } else {
1286 const unsigned min_layer = obj->MinLayer + u->_Layer;
1287 const unsigned min_level = obj->MinLevel + u->Level;
1288 const unsigned num_layers = (!u->Layered ? 1 :
1289 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1290 mt->logical_depth0);
1291 const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1292 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1293 GL_TEXTURE_2D_ARRAY : obj->Target);
1294
1295 brw->vtbl.emit_texture_surface_state(
1296 brw, mt, target,
1297 min_layer, min_layer + num_layers,
1298 min_level, min_level + 1,
1299 format, SWIZZLE_XYZW,
1300 surf_offset, access != GL_READ_ONLY, false);
1301 }
1302
1303 update_texture_image_param(brw, u, surface_idx, param);
1304 }
1305
1306 } else {
1307 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1308 update_default_image_param(brw, u, surface_idx, param);
1309 }
1310 }
1311
1312 void
1313 brw_upload_image_surfaces(struct brw_context *brw,
1314 struct gl_shader *shader,
1315 struct brw_stage_state *stage_state,
1316 struct brw_stage_prog_data *prog_data)
1317 {
1318 struct gl_context *ctx = &brw->ctx;
1319
1320 if (shader && shader->NumImages) {
1321 for (unsigned i = 0; i < shader->NumImages; i++) {
1322 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1323 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1324
1325 update_image_surface(brw, u, shader->ImageAccess[i],
1326 surf_idx,
1327 &stage_state->surf_offset[surf_idx],
1328 &prog_data->image_param[i]);
1329 }
1330
1331 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1332 }
1333 }
1334
1335 static void
1336 brw_upload_wm_image_surfaces(struct brw_context *brw)
1337 {
1338 struct gl_context *ctx = &brw->ctx;
1339 /* BRW_NEW_FRAGMENT_PROGRAM */
1340 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
1341
1342 if (prog) {
1343 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1344 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1345 &brw->wm.base, &brw->wm.prog_data->base);
1346 }
1347 }
1348
1349 const struct brw_tracked_state brw_wm_image_surfaces = {
1350 .dirty = {
1351 .mesa = _NEW_TEXTURE,
1352 .brw = BRW_NEW_BATCH |
1353 BRW_NEW_FRAGMENT_PROGRAM |
1354 BRW_NEW_FS_PROG_DATA |
1355 BRW_NEW_IMAGE_UNITS
1356 },
1357 .emit = brw_upload_wm_image_surfaces,
1358 };
1359
1360 void
1361 gen4_init_vtable_surface_functions(struct brw_context *brw)
1362 {
1363 brw->vtbl.update_texture_surface = brw_update_texture_surface;
1364 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1365 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1366 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1367 }
1368
1369 static void
1370 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1371 {
1372 struct gl_context *ctx = &brw->ctx;
1373 /* _NEW_PROGRAM */
1374 struct gl_shader_program *prog =
1375 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1376
1377 if (prog && brw->cs.prog_data->uses_num_work_groups) {
1378 const unsigned surf_idx =
1379 brw->cs.prog_data->binding_table.work_groups_start;
1380 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1381 drm_intel_bo *bo;
1382 uint32_t bo_offset;
1383
1384 if (brw->compute.num_work_groups_bo == NULL) {
1385 bo = NULL;
1386 intel_upload_data(brw,
1387 (void *)brw->compute.num_work_groups,
1388 3 * sizeof(GLuint),
1389 sizeof(GLuint),
1390 &bo,
1391 &bo_offset);
1392 } else {
1393 bo = brw->compute.num_work_groups_bo;
1394 bo_offset = brw->compute.num_work_groups_offset;
1395 }
1396
1397 brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1398 bo, bo_offset,
1399 BRW_SURFACEFORMAT_RAW,
1400 3 * sizeof(GLuint), 1, true);
1401 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1402 }
1403 }
1404
1405 const struct brw_tracked_state brw_cs_work_groups_surface = {
1406 .dirty = {
1407 .brw = BRW_NEW_CS_WORK_GROUPS
1408 },
1409 .emit = brw_upload_cs_work_groups_surface,
1410 };