i965/wm: use binding size for ubo/ssbo when automatic size is unset
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "main/shaderimage.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_instruction.h"
40 #include "main/framebuffer.h"
41
42 #include "intel_mipmap_tree.h"
43 #include "intel_batchbuffer.h"
44 #include "intel_tex.h"
45 #include "intel_fbo.h"
46 #include "intel_buffer_objects.h"
47
48 #include "brw_context.h"
49 #include "brw_state.h"
50 #include "brw_defines.h"
51 #include "brw_wm.h"
52
53 GLuint
54 translate_tex_target(GLenum target)
55 {
56 switch (target) {
57 case GL_TEXTURE_1D:
58 case GL_TEXTURE_1D_ARRAY_EXT:
59 return BRW_SURFACE_1D;
60
61 case GL_TEXTURE_RECTANGLE_NV:
62 return BRW_SURFACE_2D;
63
64 case GL_TEXTURE_2D:
65 case GL_TEXTURE_2D_ARRAY_EXT:
66 case GL_TEXTURE_EXTERNAL_OES:
67 case GL_TEXTURE_2D_MULTISAMPLE:
68 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
69 return BRW_SURFACE_2D;
70
71 case GL_TEXTURE_3D:
72 return BRW_SURFACE_3D;
73
74 case GL_TEXTURE_CUBE_MAP:
75 case GL_TEXTURE_CUBE_MAP_ARRAY:
76 return BRW_SURFACE_CUBE;
77
78 default:
79 unreachable("not reached");
80 }
81 }
82
83 uint32_t
84 brw_get_surface_tiling_bits(uint32_t tiling)
85 {
86 switch (tiling) {
87 case I915_TILING_X:
88 return BRW_SURFACE_TILED;
89 case I915_TILING_Y:
90 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
91 default:
92 return 0;
93 }
94 }
95
96
97 uint32_t
98 brw_get_surface_num_multisamples(unsigned num_samples)
99 {
100 if (num_samples > 1)
101 return BRW_SURFACE_MULTISAMPLECOUNT_4;
102 else
103 return BRW_SURFACE_MULTISAMPLECOUNT_1;
104 }
105
106 void
107 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
108 bool is_render_target,
109 unsigned *width, unsigned *height,
110 unsigned *pitch, uint32_t *tiling, unsigned *format)
111 {
112 static const unsigned halign_stencil = 8;
113
114 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
115 * there are half as many rows.
116 * In addition, mip-levels are accessed manually by the program and
117 * therefore the surface is setup to cover all the mip-levels for one slice.
118 * (Hardware is still used to access individual slices).
119 */
120 *tiling = I915_TILING_Y;
121 *pitch = mt->pitch * 2;
122 *width = ALIGN(mt->total_width, halign_stencil) * 2;
123 *height = (mt->total_height / mt->physical_depth0) / 2;
124
125 if (is_render_target) {
126 *format = BRW_SURFACEFORMAT_R8_UINT;
127 }
128 }
129
130
131 /**
132 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
133 * swizzling.
134 */
135 int
136 brw_get_texture_swizzle(const struct gl_context *ctx,
137 const struct gl_texture_object *t)
138 {
139 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
140
141 int swizzles[SWIZZLE_NIL + 1] = {
142 SWIZZLE_X,
143 SWIZZLE_Y,
144 SWIZZLE_Z,
145 SWIZZLE_W,
146 SWIZZLE_ZERO,
147 SWIZZLE_ONE,
148 SWIZZLE_NIL
149 };
150
151 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
152 img->_BaseFormat == GL_DEPTH_STENCIL) {
153 GLenum depth_mode = t->DepthMode;
154
155 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
156 * with depth component data specified with a sized internal format.
157 * Otherwise, it's left at the old default, GL_LUMINANCE.
158 */
159 if (_mesa_is_gles3(ctx) &&
160 img->InternalFormat != GL_DEPTH_COMPONENT &&
161 img->InternalFormat != GL_DEPTH_STENCIL) {
162 depth_mode = GL_RED;
163 }
164
165 switch (depth_mode) {
166 case GL_ALPHA:
167 swizzles[0] = SWIZZLE_ZERO;
168 swizzles[1] = SWIZZLE_ZERO;
169 swizzles[2] = SWIZZLE_ZERO;
170 swizzles[3] = SWIZZLE_X;
171 break;
172 case GL_LUMINANCE:
173 swizzles[0] = SWIZZLE_X;
174 swizzles[1] = SWIZZLE_X;
175 swizzles[2] = SWIZZLE_X;
176 swizzles[3] = SWIZZLE_ONE;
177 break;
178 case GL_INTENSITY:
179 swizzles[0] = SWIZZLE_X;
180 swizzles[1] = SWIZZLE_X;
181 swizzles[2] = SWIZZLE_X;
182 swizzles[3] = SWIZZLE_X;
183 break;
184 case GL_RED:
185 swizzles[0] = SWIZZLE_X;
186 swizzles[1] = SWIZZLE_ZERO;
187 swizzles[2] = SWIZZLE_ZERO;
188 swizzles[3] = SWIZZLE_ONE;
189 break;
190 }
191 }
192
193 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
194
195 /* If the texture's format is alpha-only, force R, G, and B to
196 * 0.0. Similarly, if the texture's format has no alpha channel,
197 * force the alpha value read to 1.0. This allows for the
198 * implementation to use an RGBA texture for any of these formats
199 * without leaking any unexpected values.
200 */
201 switch (img->_BaseFormat) {
202 case GL_ALPHA:
203 swizzles[0] = SWIZZLE_ZERO;
204 swizzles[1] = SWIZZLE_ZERO;
205 swizzles[2] = SWIZZLE_ZERO;
206 break;
207 case GL_LUMINANCE:
208 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
209 swizzles[0] = SWIZZLE_X;
210 swizzles[1] = SWIZZLE_X;
211 swizzles[2] = SWIZZLE_X;
212 swizzles[3] = SWIZZLE_ONE;
213 }
214 break;
215 case GL_LUMINANCE_ALPHA:
216 if (datatype == GL_SIGNED_NORMALIZED) {
217 swizzles[0] = SWIZZLE_X;
218 swizzles[1] = SWIZZLE_X;
219 swizzles[2] = SWIZZLE_X;
220 swizzles[3] = SWIZZLE_W;
221 }
222 break;
223 case GL_INTENSITY:
224 if (datatype == GL_SIGNED_NORMALIZED) {
225 swizzles[0] = SWIZZLE_X;
226 swizzles[1] = SWIZZLE_X;
227 swizzles[2] = SWIZZLE_X;
228 swizzles[3] = SWIZZLE_X;
229 }
230 break;
231 case GL_RED:
232 case GL_RG:
233 case GL_RGB:
234 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
235 swizzles[3] = SWIZZLE_ONE;
236 break;
237 }
238
239 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
240 swizzles[GET_SWZ(t->_Swizzle, 1)],
241 swizzles[GET_SWZ(t->_Swizzle, 2)],
242 swizzles[GET_SWZ(t->_Swizzle, 3)]);
243 }
244
245 static void
246 gen4_emit_buffer_surface_state(struct brw_context *brw,
247 uint32_t *out_offset,
248 drm_intel_bo *bo,
249 unsigned buffer_offset,
250 unsigned surface_format,
251 unsigned buffer_size,
252 unsigned pitch,
253 bool rw)
254 {
255 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
256 6 * 4, 32, out_offset);
257 memset(surf, 0, 6 * 4);
258
259 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
260 surface_format << BRW_SURFACE_FORMAT_SHIFT |
261 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
262 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
263 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
264 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
265 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
266 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
267
268 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
269 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
270 * physical cache. It is mapped in hardware to the sampler cache."
271 */
272 if (bo) {
273 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
274 bo, buffer_offset,
275 I915_GEM_DOMAIN_SAMPLER,
276 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
277 }
278 }
279
280 void
281 brw_update_buffer_texture_surface(struct gl_context *ctx,
282 unsigned unit,
283 uint32_t *surf_offset)
284 {
285 struct brw_context *brw = brw_context(ctx);
286 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
287 struct intel_buffer_object *intel_obj =
288 intel_buffer_object(tObj->BufferObject);
289 uint32_t size = tObj->BufferSize;
290 drm_intel_bo *bo = NULL;
291 mesa_format format = tObj->_BufferObjectFormat;
292 uint32_t brw_format = brw_format_for_mesa_format(format);
293 int texel_size = _mesa_get_format_bytes(format);
294
295 if (intel_obj) {
296 size = MIN2(size, intel_obj->Base.Size);
297 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
298 }
299
300 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
301 _mesa_problem(NULL, "bad format %s for texture buffer\n",
302 _mesa_get_format_name(format));
303 }
304
305 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
306 tObj->BufferOffset,
307 brw_format,
308 size / texel_size,
309 texel_size,
310 false /* rw */);
311 }
312
313 static void
314 brw_update_texture_surface(struct gl_context *ctx,
315 unsigned unit,
316 uint32_t *surf_offset,
317 bool for_gather)
318 {
319 struct brw_context *brw = brw_context(ctx);
320 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
321 struct intel_texture_object *intelObj = intel_texture_object(tObj);
322 struct intel_mipmap_tree *mt = intelObj->mt;
323 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
324 uint32_t *surf;
325
326 /* BRW_NEW_TEXTURE_BUFFER */
327 if (tObj->Target == GL_TEXTURE_BUFFER) {
328 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
329 return;
330 }
331
332 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
333 6 * 4, 32, surf_offset);
334
335 uint32_t tex_format = translate_tex_format(brw, mt->format,
336 sampler->sRGBDecode);
337
338 if (for_gather) {
339 /* Sandybridge's gather4 message is broken for integer formats.
340 * To work around this, we pretend the surface is UNORM for
341 * 8 or 16-bit formats, and emit shader instructions to recover
342 * the real INT/UINT value. For 32-bit formats, we pretend
343 * the surface is FLOAT, and simply reinterpret the resulting
344 * bits.
345 */
346 switch (tex_format) {
347 case BRW_SURFACEFORMAT_R8_SINT:
348 case BRW_SURFACEFORMAT_R8_UINT:
349 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
350 break;
351
352 case BRW_SURFACEFORMAT_R16_SINT:
353 case BRW_SURFACEFORMAT_R16_UINT:
354 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
355 break;
356
357 case BRW_SURFACEFORMAT_R32_SINT:
358 case BRW_SURFACEFORMAT_R32_UINT:
359 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
360 break;
361
362 default:
363 break;
364 }
365 }
366
367 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
368 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
369 BRW_SURFACE_CUBEFACE_ENABLES |
370 tex_format << BRW_SURFACE_FORMAT_SHIFT);
371
372 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
373
374 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
375 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
376 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
377
378 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
379 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
380 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
381
382 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
383 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
384
385 surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
386
387 /* Emit relocation to surface contents */
388 drm_intel_bo_emit_reloc(brw->batch.bo,
389 *surf_offset + 4,
390 mt->bo,
391 surf[1] - mt->bo->offset64,
392 I915_GEM_DOMAIN_SAMPLER, 0);
393 }
394
395 /**
396 * Create the constant buffer surface. Vertex/fragment shader constants will be
397 * read from this buffer with Data Port Read instructions/messages.
398 */
399 void
400 brw_create_constant_surface(struct brw_context *brw,
401 drm_intel_bo *bo,
402 uint32_t offset,
403 uint32_t size,
404 uint32_t *out_offset)
405 {
406 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
407 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
408 size, 1, false);
409 }
410
411 /**
412 * Create the buffer surface. Shader buffer variables will be
413 * read from / write to this buffer with Data Port Read/Write
414 * instructions/messages.
415 */
416 void
417 brw_create_buffer_surface(struct brw_context *brw,
418 drm_intel_bo *bo,
419 uint32_t offset,
420 uint32_t size,
421 uint32_t *out_offset)
422 {
423 /* Use a raw surface so we can reuse existing untyped read/write/atomic
424 * messages. We need these specifically for the fragment shader since they
425 * include a pixel mask header that we need to ensure correct behavior
426 * with helper invocations, which cannot write to the buffer.
427 */
428 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
429 BRW_SURFACEFORMAT_RAW,
430 size, 1, true);
431 }
432
433 /**
434 * Set up a binding table entry for use by stream output logic (transform
435 * feedback).
436 *
437 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
438 */
439 void
440 brw_update_sol_surface(struct brw_context *brw,
441 struct gl_buffer_object *buffer_obj,
442 uint32_t *out_offset, unsigned num_vector_components,
443 unsigned stride_dwords, unsigned offset_dwords)
444 {
445 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
446 uint32_t offset_bytes = 4 * offset_dwords;
447 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
448 offset_bytes,
449 buffer_obj->Size - offset_bytes);
450 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
451 out_offset);
452 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
453 size_t size_dwords = buffer_obj->Size / 4;
454 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
455
456 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
457 * too big to map using a single binding table entry?
458 */
459 assert((size_dwords - offset_dwords) / stride_dwords
460 <= BRW_MAX_NUM_BUFFER_ENTRIES);
461
462 if (size_dwords > offset_dwords + num_vector_components) {
463 /* There is room for at least 1 transform feedback output in the buffer.
464 * Compute the number of additional transform feedback outputs the
465 * buffer has room for.
466 */
467 buffer_size_minus_1 =
468 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
469 } else {
470 /* There isn't even room for a single transform feedback output in the
471 * buffer. We can't configure the binding table entry to prevent output
472 * entirely; we'll have to rely on the geometry shader to detect
473 * overflow. But to minimize the damage in case of a bug, set up the
474 * binding table entry to just allow a single output.
475 */
476 buffer_size_minus_1 = 0;
477 }
478 width = buffer_size_minus_1 & 0x7f;
479 height = (buffer_size_minus_1 & 0xfff80) >> 7;
480 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
481
482 switch (num_vector_components) {
483 case 1:
484 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
485 break;
486 case 2:
487 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
488 break;
489 case 3:
490 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
491 break;
492 case 4:
493 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
494 break;
495 default:
496 unreachable("Invalid vector size for transform feedback output");
497 }
498
499 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
500 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
501 surface_format << BRW_SURFACE_FORMAT_SHIFT |
502 BRW_SURFACE_RC_READ_WRITE;
503 surf[1] = bo->offset64 + offset_bytes; /* reloc */
504 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
505 height << BRW_SURFACE_HEIGHT_SHIFT);
506 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
507 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
508 surf[4] = 0;
509 surf[5] = 0;
510
511 /* Emit relocation to surface contents. */
512 drm_intel_bo_emit_reloc(brw->batch.bo,
513 *out_offset + 4,
514 bo, offset_bytes,
515 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
516 }
517
518 /* Creates a new WM constant buffer reflecting the current fragment program's
519 * constants, if needed by the fragment program.
520 *
521 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
522 * state atom.
523 */
524 static void
525 brw_upload_wm_pull_constants(struct brw_context *brw)
526 {
527 struct brw_stage_state *stage_state = &brw->wm.base;
528 /* BRW_NEW_FRAGMENT_PROGRAM */
529 struct brw_fragment_program *fp =
530 (struct brw_fragment_program *) brw->fragment_program;
531 /* BRW_NEW_FS_PROG_DATA */
532 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
533
534 /* _NEW_PROGRAM_CONSTANTS */
535 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
536 stage_state, prog_data);
537 }
538
539 const struct brw_tracked_state brw_wm_pull_constants = {
540 .dirty = {
541 .mesa = _NEW_PROGRAM_CONSTANTS,
542 .brw = BRW_NEW_BATCH |
543 BRW_NEW_FRAGMENT_PROGRAM |
544 BRW_NEW_FS_PROG_DATA,
545 },
546 .emit = brw_upload_wm_pull_constants,
547 };
548
549 /**
550 * Creates a null renderbuffer surface.
551 *
552 * This is used when the shader doesn't write to any color output. An FB
553 * write to target 0 will still be emitted, because that's how the thread is
554 * terminated (and computed depth is returned), so we need to have the
555 * hardware discard the target 0 color output..
556 */
557 static void
558 brw_emit_null_surface_state(struct brw_context *brw,
559 unsigned width,
560 unsigned height,
561 unsigned samples,
562 uint32_t *out_offset)
563 {
564 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
565 * Notes):
566 *
567 * A null surface will be used in instances where an actual surface is
568 * not bound. When a write message is generated to a null surface, no
569 * actual surface is written to. When a read message (including any
570 * sampling engine message) is generated to a null surface, the result
571 * is all zeros. Note that a null surface type is allowed to be used
572 * with all messages, even if it is not specificially indicated as
573 * supported. All of the remaining fields in surface state are ignored
574 * for null surfaces, with the following exceptions:
575 *
576 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
577 * depth buffer’s corresponding state for all render target surfaces,
578 * including null.
579 *
580 * - Surface Format must be R8G8B8A8_UNORM.
581 */
582 unsigned surface_type = BRW_SURFACE_NULL;
583 drm_intel_bo *bo = NULL;
584 unsigned pitch_minus_1 = 0;
585 uint32_t multisampling_state = 0;
586 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
587 out_offset);
588
589 if (samples > 1) {
590 /* On Gen6, null render targets seem to cause GPU hangs when
591 * multisampling. So work around this problem by rendering into dummy
592 * color buffer.
593 *
594 * To decrease the amount of memory needed by the workaround buffer, we
595 * set its pitch to 128 bytes (the width of a Y tile). This means that
596 * the amount of memory needed for the workaround buffer is
597 * (width_in_tiles + height_in_tiles - 1) tiles.
598 *
599 * Note that since the workaround buffer will be interpreted by the
600 * hardware as an interleaved multisampled buffer, we need to compute
601 * width_in_tiles and height_in_tiles by dividing the width and height
602 * by 16 rather than the normal Y-tile size of 32.
603 */
604 unsigned width_in_tiles = ALIGN(width, 16) / 16;
605 unsigned height_in_tiles = ALIGN(height, 16) / 16;
606 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
607 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
608 size_needed);
609 bo = brw->wm.multisampled_null_render_target_bo;
610 surface_type = BRW_SURFACE_2D;
611 pitch_minus_1 = 127;
612 multisampling_state = brw_get_surface_num_multisamples(samples);
613 }
614
615 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
616 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
617 if (brw->gen < 6) {
618 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
619 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
620 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
621 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
622 }
623 surf[1] = bo ? bo->offset64 : 0;
624 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
625 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
626
627 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
628 * Notes):
629 *
630 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
631 */
632 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
633 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
634 surf[4] = multisampling_state;
635 surf[5] = 0;
636
637 if (bo) {
638 drm_intel_bo_emit_reloc(brw->batch.bo,
639 *out_offset + 4,
640 bo, 0,
641 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
642 }
643 }
644
645 /**
646 * Sets up a surface state structure to point at the given region.
647 * While it is only used for the front/back buffer currently, it should be
648 * usable for further buffers when doing ARB_draw_buffer support.
649 */
650 static uint32_t
651 brw_update_renderbuffer_surface(struct brw_context *brw,
652 struct gl_renderbuffer *rb,
653 bool layered, unsigned unit,
654 uint32_t surf_index)
655 {
656 struct gl_context *ctx = &brw->ctx;
657 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
658 struct intel_mipmap_tree *mt = irb->mt;
659 uint32_t *surf;
660 uint32_t tile_x, tile_y;
661 uint32_t format = 0;
662 uint32_t offset;
663 /* _NEW_BUFFERS */
664 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
665 /* BRW_NEW_FS_PROG_DATA */
666
667 assert(!layered);
668
669 if (rb->TexImage && !brw->has_surface_tile_offset) {
670 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
671
672 if (tile_x != 0 || tile_y != 0) {
673 /* Original gen4 hardware couldn't draw to a non-tile-aligned
674 * destination in a miptree unless you actually setup your renderbuffer
675 * as a miptree and used the fragile lod/array_index/etc. controls to
676 * select the image. So, instead, we just make a new single-level
677 * miptree and render into that.
678 */
679 intel_renderbuffer_move_to_temp(brw, irb, false);
680 mt = irb->mt;
681 }
682 }
683
684 intel_miptree_used_for_rendering(irb->mt);
685
686 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
687
688 format = brw->render_target_format[rb_format];
689 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
690 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
691 __func__, _mesa_get_format_name(rb_format));
692 }
693
694 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
695 format << BRW_SURFACE_FORMAT_SHIFT);
696
697 /* reloc */
698 assert(mt->offset % mt->cpp == 0);
699 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
700 mt->bo->offset64 + mt->offset);
701
702 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
703 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
704
705 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
706 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
707
708 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
709
710 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
711 /* Note that the low bits of these fields are missing, so
712 * there's the possibility of getting in trouble.
713 */
714 assert(tile_x % 4 == 0);
715 assert(tile_y % 2 == 0);
716 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
717 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
718 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
719
720 if (brw->gen < 6) {
721 /* _NEW_COLOR */
722 if (!ctx->Color.ColorLogicOpEnabled &&
723 (ctx->Color.BlendEnabled & (1 << unit)))
724 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
725
726 if (!ctx->Color.ColorMask[unit][0])
727 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
728 if (!ctx->Color.ColorMask[unit][1])
729 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
730 if (!ctx->Color.ColorMask[unit][2])
731 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
732
733 /* As mentioned above, disable writes to the alpha component when the
734 * renderbuffer is XRGB.
735 */
736 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
737 !ctx->Color.ColorMask[unit][3]) {
738 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
739 }
740 }
741
742 drm_intel_bo_emit_reloc(brw->batch.bo,
743 offset + 4,
744 mt->bo,
745 surf[1] - mt->bo->offset64,
746 I915_GEM_DOMAIN_RENDER,
747 I915_GEM_DOMAIN_RENDER);
748
749 return offset;
750 }
751
752 /**
753 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
754 */
755 void
756 brw_update_renderbuffer_surfaces(struct brw_context *brw,
757 const struct gl_framebuffer *fb,
758 uint32_t render_target_start,
759 uint32_t *surf_offset)
760 {
761 GLuint i;
762 const unsigned int w = _mesa_geometric_width(fb);
763 const unsigned int h = _mesa_geometric_height(fb);
764 const unsigned int s = _mesa_geometric_samples(fb);
765
766 /* Update surfaces for drawing buffers */
767 if (fb->_NumColorDrawBuffers >= 1) {
768 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
769 const uint32_t surf_index = render_target_start + i;
770
771 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
772 surf_offset[surf_index] =
773 brw->vtbl.update_renderbuffer_surface(
774 brw, fb->_ColorDrawBuffers[i],
775 _mesa_geometric_layers(fb) > 0, i, surf_index);
776 } else {
777 brw->vtbl.emit_null_surface_state(brw, w, h, s,
778 &surf_offset[surf_index]);
779 }
780 }
781 } else {
782 const uint32_t surf_index = render_target_start;
783 brw->vtbl.emit_null_surface_state(brw, w, h, s,
784 &surf_offset[surf_index]);
785 }
786 }
787
788 static void
789 update_renderbuffer_surfaces(struct brw_context *brw)
790 {
791 const struct gl_context *ctx = &brw->ctx;
792
793 /* _NEW_BUFFERS | _NEW_COLOR */
794 const struct gl_framebuffer *fb = ctx->DrawBuffer;
795 brw_update_renderbuffer_surfaces(
796 brw, fb,
797 brw->wm.prog_data->binding_table.render_target_start,
798 brw->wm.base.surf_offset);
799 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
800 }
801
802 const struct brw_tracked_state brw_renderbuffer_surfaces = {
803 .dirty = {
804 .mesa = _NEW_BUFFERS |
805 _NEW_COLOR,
806 .brw = BRW_NEW_BATCH |
807 BRW_NEW_FS_PROG_DATA,
808 },
809 .emit = update_renderbuffer_surfaces,
810 };
811
812 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
813 .dirty = {
814 .mesa = _NEW_BUFFERS,
815 .brw = BRW_NEW_BATCH,
816 },
817 .emit = update_renderbuffer_surfaces,
818 };
819
820
821 static void
822 update_stage_texture_surfaces(struct brw_context *brw,
823 const struct gl_program *prog,
824 struct brw_stage_state *stage_state,
825 bool for_gather)
826 {
827 if (!prog)
828 return;
829
830 struct gl_context *ctx = &brw->ctx;
831
832 uint32_t *surf_offset = stage_state->surf_offset;
833
834 /* BRW_NEW_*_PROG_DATA */
835 if (for_gather)
836 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
837 else
838 surf_offset += stage_state->prog_data->binding_table.texture_start;
839
840 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
841 for (unsigned s = 0; s < num_samplers; s++) {
842 surf_offset[s] = 0;
843
844 if (prog->SamplersUsed & (1 << s)) {
845 const unsigned unit = prog->SamplerUnits[s];
846
847 /* _NEW_TEXTURE */
848 if (ctx->Texture.Unit[unit]._Current) {
849 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
850 }
851 }
852 }
853 }
854
855
856 /**
857 * Construct SURFACE_STATE objects for enabled textures.
858 */
859 static void
860 brw_update_texture_surfaces(struct brw_context *brw)
861 {
862 /* BRW_NEW_VERTEX_PROGRAM */
863 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
864
865 /* BRW_NEW_TESS_PROGRAMS */
866 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
867 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
868
869 /* BRW_NEW_GEOMETRY_PROGRAM */
870 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
871
872 /* BRW_NEW_FRAGMENT_PROGRAM */
873 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
874
875 /* BRW_NEW_COMPUTE_PROGRAM */
876 struct gl_program *cs = (struct gl_program *) brw->compute_program;
877
878 /* _NEW_TEXTURE */
879 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
880 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false);
881 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false);
882 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
883 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
884 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
885
886 /* emit alternate set of surface state for gather. this
887 * allows the surface format to be overriden for only the
888 * gather4 messages. */
889 if (brw->gen < 8) {
890 if (vs && vs->UsesGather)
891 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
892 if (tcs && tcs->UsesGather)
893 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true);
894 if (tes && tes->UsesGather)
895 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true);
896 if (gs && gs->UsesGather)
897 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
898 if (fs && fs->UsesGather)
899 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
900 if (cs && cs->UsesGather)
901 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
902 }
903
904 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
905 }
906
907 const struct brw_tracked_state brw_texture_surfaces = {
908 .dirty = {
909 .mesa = _NEW_TEXTURE,
910 .brw = BRW_NEW_BATCH |
911 BRW_NEW_COMPUTE_PROGRAM |
912 BRW_NEW_FRAGMENT_PROGRAM |
913 BRW_NEW_FS_PROG_DATA |
914 BRW_NEW_GEOMETRY_PROGRAM |
915 BRW_NEW_GS_PROG_DATA |
916 BRW_NEW_TESS_PROGRAMS |
917 BRW_NEW_TCS_PROG_DATA |
918 BRW_NEW_TES_PROG_DATA |
919 BRW_NEW_TEXTURE_BUFFER |
920 BRW_NEW_VERTEX_PROGRAM |
921 BRW_NEW_VS_PROG_DATA,
922 },
923 .emit = brw_update_texture_surfaces,
924 };
925
926 void
927 brw_upload_ubo_surfaces(struct brw_context *brw,
928 struct gl_shader *shader,
929 struct brw_stage_state *stage_state,
930 struct brw_stage_prog_data *prog_data)
931 {
932 struct gl_context *ctx = &brw->ctx;
933
934 if (!shader)
935 return;
936
937 uint32_t *ubo_surf_offsets =
938 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
939
940 for (int i = 0; i < shader->NumUniformBlocks; i++) {
941 struct gl_uniform_buffer_binding *binding =
942 &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
943
944 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
945 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
946 } else {
947 struct intel_buffer_object *intel_bo =
948 intel_buffer_object(binding->BufferObject);
949 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
950 if (!binding->AutomaticSize)
951 size = MIN2(size, binding->Size);
952 drm_intel_bo *bo =
953 intel_bufferobj_buffer(brw, intel_bo,
954 binding->Offset,
955 size);
956 brw_create_constant_surface(brw, bo, binding->Offset,
957 size,
958 &ubo_surf_offsets[i]);
959 }
960 }
961
962 uint32_t *ssbo_surf_offsets =
963 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
964
965 for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
966 struct gl_shader_storage_buffer_binding *binding =
967 &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
968
969 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
970 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
971 } else {
972 struct intel_buffer_object *intel_bo =
973 intel_buffer_object(binding->BufferObject);
974 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
975 if (!binding->AutomaticSize)
976 size = MIN2(size, binding->Size);
977 drm_intel_bo *bo =
978 intel_bufferobj_buffer(brw, intel_bo,
979 binding->Offset,
980 size);
981 brw_create_buffer_surface(brw, bo, binding->Offset,
982 size,
983 &ssbo_surf_offsets[i]);
984 }
985 }
986
987 if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
988 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
989 }
990
991 static void
992 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
993 {
994 struct gl_context *ctx = &brw->ctx;
995 /* _NEW_PROGRAM */
996 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
997
998 if (!prog)
999 return;
1000
1001 /* BRW_NEW_FS_PROG_DATA */
1002 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1003 &brw->wm.base, &brw->wm.prog_data->base);
1004 }
1005
1006 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1007 .dirty = {
1008 .mesa = _NEW_PROGRAM,
1009 .brw = BRW_NEW_BATCH |
1010 BRW_NEW_FS_PROG_DATA |
1011 BRW_NEW_UNIFORM_BUFFER,
1012 },
1013 .emit = brw_upload_wm_ubo_surfaces,
1014 };
1015
1016 static void
1017 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1018 {
1019 struct gl_context *ctx = &brw->ctx;
1020 /* _NEW_PROGRAM */
1021 struct gl_shader_program *prog =
1022 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1023
1024 if (!prog)
1025 return;
1026
1027 /* BRW_NEW_CS_PROG_DATA */
1028 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1029 &brw->cs.base, &brw->cs.prog_data->base);
1030 }
1031
1032 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1033 .dirty = {
1034 .mesa = _NEW_PROGRAM,
1035 .brw = BRW_NEW_BATCH |
1036 BRW_NEW_CS_PROG_DATA |
1037 BRW_NEW_UNIFORM_BUFFER,
1038 },
1039 .emit = brw_upload_cs_ubo_surfaces,
1040 };
1041
1042 void
1043 brw_upload_abo_surfaces(struct brw_context *brw,
1044 struct gl_shader *shader,
1045 struct brw_stage_state *stage_state,
1046 struct brw_stage_prog_data *prog_data)
1047 {
1048 struct gl_context *ctx = &brw->ctx;
1049 uint32_t *surf_offsets =
1050 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1051
1052 if (shader && shader->NumAtomicBuffers) {
1053 for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1054 struct gl_atomic_buffer_binding *binding =
1055 &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1056 struct intel_buffer_object *intel_bo =
1057 intel_buffer_object(binding->BufferObject);
1058 drm_intel_bo *bo = intel_bufferobj_buffer(
1059 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1060
1061 brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1062 binding->Offset, BRW_SURFACEFORMAT_RAW,
1063 bo->size - binding->Offset, 1, true);
1064 }
1065
1066 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1067 }
1068 }
1069
1070 static void
1071 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1072 {
1073 struct gl_context *ctx = &brw->ctx;
1074 /* _NEW_PROGRAM */
1075 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1076
1077 if (prog) {
1078 /* BRW_NEW_FS_PROG_DATA */
1079 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1080 &brw->wm.base, &brw->wm.prog_data->base);
1081 }
1082 }
1083
1084 const struct brw_tracked_state brw_wm_abo_surfaces = {
1085 .dirty = {
1086 .mesa = _NEW_PROGRAM,
1087 .brw = BRW_NEW_ATOMIC_BUFFER |
1088 BRW_NEW_BATCH |
1089 BRW_NEW_FS_PROG_DATA,
1090 },
1091 .emit = brw_upload_wm_abo_surfaces,
1092 };
1093
1094 static void
1095 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1096 {
1097 struct gl_context *ctx = &brw->ctx;
1098 /* _NEW_PROGRAM */
1099 struct gl_shader_program *prog =
1100 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1101
1102 if (prog) {
1103 /* BRW_NEW_CS_PROG_DATA */
1104 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1105 &brw->cs.base, &brw->cs.prog_data->base);
1106 }
1107 }
1108
1109 const struct brw_tracked_state brw_cs_abo_surfaces = {
1110 .dirty = {
1111 .mesa = _NEW_PROGRAM,
1112 .brw = BRW_NEW_ATOMIC_BUFFER |
1113 BRW_NEW_BATCH |
1114 BRW_NEW_CS_PROG_DATA,
1115 },
1116 .emit = brw_upload_cs_abo_surfaces,
1117 };
1118
1119 static void
1120 brw_upload_cs_image_surfaces(struct brw_context *brw)
1121 {
1122 struct gl_context *ctx = &brw->ctx;
1123 /* _NEW_PROGRAM */
1124 struct gl_shader_program *prog =
1125 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1126
1127 if (prog) {
1128 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1129 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1130 &brw->cs.base, &brw->cs.prog_data->base);
1131 }
1132 }
1133
1134 const struct brw_tracked_state brw_cs_image_surfaces = {
1135 .dirty = {
1136 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1137 .brw = BRW_NEW_BATCH |
1138 BRW_NEW_CS_PROG_DATA |
1139 BRW_NEW_IMAGE_UNITS
1140 },
1141 .emit = brw_upload_cs_image_surfaces,
1142 };
1143
1144 static uint32_t
1145 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1146 {
1147 if (access == GL_WRITE_ONLY) {
1148 return brw_format_for_mesa_format(format);
1149 } else {
1150 /* Typed surface reads support a very limited subset of the shader
1151 * image formats. Translate it into the closest format the
1152 * hardware supports.
1153 */
1154 if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1155 (_mesa_get_format_bytes(format) >= 8 &&
1156 (brw->gen == 7 && !brw->is_haswell)))
1157 return BRW_SURFACEFORMAT_RAW;
1158 else
1159 return brw_format_for_mesa_format(
1160 brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1161 }
1162 }
1163
1164 static void
1165 update_default_image_param(struct brw_context *brw,
1166 struct gl_image_unit *u,
1167 unsigned surface_idx,
1168 struct brw_image_param *param)
1169 {
1170 memset(param, 0, sizeof(*param));
1171 param->surface_idx = surface_idx;
1172 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1173 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1174 * detailed explanation of these parameters.
1175 */
1176 param->swizzling[0] = 0xff;
1177 param->swizzling[1] = 0xff;
1178 }
1179
1180 static void
1181 update_buffer_image_param(struct brw_context *brw,
1182 struct gl_image_unit *u,
1183 unsigned surface_idx,
1184 struct brw_image_param *param)
1185 {
1186 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1187
1188 update_default_image_param(brw, u, surface_idx, param);
1189
1190 param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1191 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1192 }
1193
1194 static void
1195 update_texture_image_param(struct brw_context *brw,
1196 struct gl_image_unit *u,
1197 unsigned surface_idx,
1198 struct brw_image_param *param)
1199 {
1200 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1201
1202 update_default_image_param(brw, u, surface_idx, param);
1203
1204 param->size[0] = minify(mt->logical_width0, u->Level);
1205 param->size[1] = minify(mt->logical_height0, u->Level);
1206 param->size[2] = (!u->Layered ? 1 :
1207 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1208 u->TexObj->Target == GL_TEXTURE_3D ?
1209 minify(mt->logical_depth0, u->Level) :
1210 mt->logical_depth0);
1211
1212 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1213 &param->offset[0],
1214 &param->offset[1]);
1215
1216 param->stride[0] = mt->cpp;
1217 param->stride[1] = mt->pitch / mt->cpp;
1218 param->stride[2] =
1219 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1220 param->stride[3] =
1221 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1222
1223 if (mt->tiling == I915_TILING_X) {
1224 /* An X tile is a rectangular block of 512x8 bytes. */
1225 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1226 param->tiling[1] = _mesa_logbase2(8);
1227
1228 if (brw->has_swizzling) {
1229 /* Right shifts required to swizzle bits 9 and 10 of the memory
1230 * address with bit 6.
1231 */
1232 param->swizzling[0] = 3;
1233 param->swizzling[1] = 4;
1234 }
1235 } else if (mt->tiling == I915_TILING_Y) {
1236 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1237 * different to the layout of an X-tiled surface, we simply pretend that
1238 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1239 * one arranged in X-major order just like is the case for X-tiling.
1240 */
1241 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1242 param->tiling[1] = _mesa_logbase2(32);
1243
1244 if (brw->has_swizzling) {
1245 /* Right shift required to swizzle bit 9 of the memory address with
1246 * bit 6.
1247 */
1248 param->swizzling[0] = 3;
1249 }
1250 }
1251
1252 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1253 * address calculation algorithm (emit_address_calculation() in
1254 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1255 * modulus equal to the LOD.
1256 */
1257 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1258 0);
1259 }
1260
1261 static void
1262 update_image_surface(struct brw_context *brw,
1263 struct gl_image_unit *u,
1264 GLenum access,
1265 unsigned surface_idx,
1266 uint32_t *surf_offset,
1267 struct brw_image_param *param)
1268 {
1269 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1270 struct gl_texture_object *obj = u->TexObj;
1271 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1272
1273 if (obj->Target == GL_TEXTURE_BUFFER) {
1274 struct intel_buffer_object *intel_obj =
1275 intel_buffer_object(obj->BufferObject);
1276 const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1277 _mesa_get_format_bytes(u->_ActualFormat));
1278
1279 brw->vtbl.emit_buffer_surface_state(
1280 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1281 format, intel_obj->Base.Size / texel_size, texel_size,
1282 access != GL_READ_ONLY);
1283
1284 update_buffer_image_param(brw, u, surface_idx, param);
1285
1286 } else {
1287 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1288 struct intel_mipmap_tree *mt = intel_obj->mt;
1289
1290 if (format == BRW_SURFACEFORMAT_RAW) {
1291 brw->vtbl.emit_buffer_surface_state(
1292 brw, surf_offset, mt->bo, mt->offset,
1293 format, mt->bo->size - mt->offset, 1 /* pitch */,
1294 access != GL_READ_ONLY);
1295
1296 } else {
1297 const unsigned min_layer = obj->MinLayer + u->_Layer;
1298 const unsigned min_level = obj->MinLevel + u->Level;
1299 const unsigned num_layers = (!u->Layered ? 1 :
1300 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1301 mt->logical_depth0);
1302 const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1303 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1304 GL_TEXTURE_2D_ARRAY : obj->Target);
1305
1306 brw->vtbl.emit_texture_surface_state(
1307 brw, mt, target,
1308 min_layer, min_layer + num_layers,
1309 min_level, min_level + 1,
1310 format, SWIZZLE_XYZW,
1311 surf_offset, access != GL_READ_ONLY, false);
1312 }
1313
1314 update_texture_image_param(brw, u, surface_idx, param);
1315 }
1316
1317 } else {
1318 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1319 update_default_image_param(brw, u, surface_idx, param);
1320 }
1321 }
1322
1323 void
1324 brw_upload_image_surfaces(struct brw_context *brw,
1325 struct gl_shader *shader,
1326 struct brw_stage_state *stage_state,
1327 struct brw_stage_prog_data *prog_data)
1328 {
1329 struct gl_context *ctx = &brw->ctx;
1330
1331 if (shader && shader->NumImages) {
1332 for (unsigned i = 0; i < shader->NumImages; i++) {
1333 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1334 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1335
1336 update_image_surface(brw, u, shader->ImageAccess[i],
1337 surf_idx,
1338 &stage_state->surf_offset[surf_idx],
1339 &prog_data->image_param[i]);
1340 }
1341
1342 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1343 }
1344 }
1345
1346 static void
1347 brw_upload_wm_image_surfaces(struct brw_context *brw)
1348 {
1349 struct gl_context *ctx = &brw->ctx;
1350 /* BRW_NEW_FRAGMENT_PROGRAM */
1351 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1352
1353 if (prog) {
1354 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1355 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1356 &brw->wm.base, &brw->wm.prog_data->base);
1357 }
1358 }
1359
1360 const struct brw_tracked_state brw_wm_image_surfaces = {
1361 .dirty = {
1362 .mesa = _NEW_TEXTURE,
1363 .brw = BRW_NEW_BATCH |
1364 BRW_NEW_FRAGMENT_PROGRAM |
1365 BRW_NEW_FS_PROG_DATA |
1366 BRW_NEW_IMAGE_UNITS
1367 },
1368 .emit = brw_upload_wm_image_surfaces,
1369 };
1370
1371 void
1372 gen4_init_vtable_surface_functions(struct brw_context *brw)
1373 {
1374 brw->vtbl.update_texture_surface = brw_update_texture_surface;
1375 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1376 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1377 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1378 }
1379
1380 static void
1381 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1382 {
1383 struct gl_context *ctx = &brw->ctx;
1384 /* _NEW_PROGRAM */
1385 struct gl_shader_program *prog =
1386 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1387
1388 if (prog && brw->cs.prog_data->uses_num_work_groups) {
1389 const unsigned surf_idx =
1390 brw->cs.prog_data->binding_table.work_groups_start;
1391 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1392 drm_intel_bo *bo;
1393 uint32_t bo_offset;
1394
1395 if (brw->compute.num_work_groups_bo == NULL) {
1396 bo = NULL;
1397 intel_upload_data(brw,
1398 (void *)brw->compute.num_work_groups,
1399 3 * sizeof(GLuint),
1400 sizeof(GLuint),
1401 &bo,
1402 &bo_offset);
1403 } else {
1404 bo = brw->compute.num_work_groups_bo;
1405 bo_offset = brw->compute.num_work_groups_offset;
1406 }
1407
1408 brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1409 bo, bo_offset,
1410 BRW_SURFACEFORMAT_RAW,
1411 3 * sizeof(GLuint), 1, true);
1412 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1413 }
1414 }
1415
1416 const struct brw_tracked_state brw_cs_work_groups_surface = {
1417 .dirty = {
1418 .brw = BRW_NEW_CS_WORK_GROUPS
1419 },
1420 .emit = brw_upload_cs_work_groups_surface,
1421 };