Revert "i965/wm: use proper API buffer size for the surfaces."
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "main/shaderimage.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_instruction.h"
40 #include "main/framebuffer.h"
41
42 #include "intel_mipmap_tree.h"
43 #include "intel_batchbuffer.h"
44 #include "intel_tex.h"
45 #include "intel_fbo.h"
46 #include "intel_buffer_objects.h"
47
48 #include "brw_context.h"
49 #include "brw_state.h"
50 #include "brw_defines.h"
51 #include "brw_wm.h"
52
53 GLuint
54 translate_tex_target(GLenum target)
55 {
56 switch (target) {
57 case GL_TEXTURE_1D:
58 case GL_TEXTURE_1D_ARRAY_EXT:
59 return BRW_SURFACE_1D;
60
61 case GL_TEXTURE_RECTANGLE_NV:
62 return BRW_SURFACE_2D;
63
64 case GL_TEXTURE_2D:
65 case GL_TEXTURE_2D_ARRAY_EXT:
66 case GL_TEXTURE_EXTERNAL_OES:
67 case GL_TEXTURE_2D_MULTISAMPLE:
68 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
69 return BRW_SURFACE_2D;
70
71 case GL_TEXTURE_3D:
72 return BRW_SURFACE_3D;
73
74 case GL_TEXTURE_CUBE_MAP:
75 case GL_TEXTURE_CUBE_MAP_ARRAY:
76 return BRW_SURFACE_CUBE;
77
78 default:
79 unreachable("not reached");
80 }
81 }
82
83 uint32_t
84 brw_get_surface_tiling_bits(uint32_t tiling)
85 {
86 switch (tiling) {
87 case I915_TILING_X:
88 return BRW_SURFACE_TILED;
89 case I915_TILING_Y:
90 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
91 default:
92 return 0;
93 }
94 }
95
96
97 uint32_t
98 brw_get_surface_num_multisamples(unsigned num_samples)
99 {
100 if (num_samples > 1)
101 return BRW_SURFACE_MULTISAMPLECOUNT_4;
102 else
103 return BRW_SURFACE_MULTISAMPLECOUNT_1;
104 }
105
106 void
107 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
108 bool is_render_target,
109 unsigned *width, unsigned *height,
110 unsigned *pitch, uint32_t *tiling, unsigned *format)
111 {
112 static const unsigned halign_stencil = 8;
113
114 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
115 * there are half as many rows.
116 * In addition, mip-levels are accessed manually by the program and
117 * therefore the surface is setup to cover all the mip-levels for one slice.
118 * (Hardware is still used to access individual slices).
119 */
120 *tiling = I915_TILING_Y;
121 *pitch = mt->pitch * 2;
122 *width = ALIGN(mt->total_width, halign_stencil) * 2;
123 *height = (mt->total_height / mt->physical_depth0) / 2;
124
125 if (is_render_target) {
126 *format = BRW_SURFACEFORMAT_R8_UINT;
127 }
128 }
129
130
131 /**
132 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
133 * swizzling.
134 */
135 int
136 brw_get_texture_swizzle(const struct gl_context *ctx,
137 const struct gl_texture_object *t)
138 {
139 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
140
141 int swizzles[SWIZZLE_NIL + 1] = {
142 SWIZZLE_X,
143 SWIZZLE_Y,
144 SWIZZLE_Z,
145 SWIZZLE_W,
146 SWIZZLE_ZERO,
147 SWIZZLE_ONE,
148 SWIZZLE_NIL
149 };
150
151 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
152 img->_BaseFormat == GL_DEPTH_STENCIL) {
153 GLenum depth_mode = t->DepthMode;
154
155 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
156 * with depth component data specified with a sized internal format.
157 * Otherwise, it's left at the old default, GL_LUMINANCE.
158 */
159 if (_mesa_is_gles3(ctx) &&
160 img->InternalFormat != GL_DEPTH_COMPONENT &&
161 img->InternalFormat != GL_DEPTH_STENCIL) {
162 depth_mode = GL_RED;
163 }
164
165 switch (depth_mode) {
166 case GL_ALPHA:
167 swizzles[0] = SWIZZLE_ZERO;
168 swizzles[1] = SWIZZLE_ZERO;
169 swizzles[2] = SWIZZLE_ZERO;
170 swizzles[3] = SWIZZLE_X;
171 break;
172 case GL_LUMINANCE:
173 swizzles[0] = SWIZZLE_X;
174 swizzles[1] = SWIZZLE_X;
175 swizzles[2] = SWIZZLE_X;
176 swizzles[3] = SWIZZLE_ONE;
177 break;
178 case GL_INTENSITY:
179 swizzles[0] = SWIZZLE_X;
180 swizzles[1] = SWIZZLE_X;
181 swizzles[2] = SWIZZLE_X;
182 swizzles[3] = SWIZZLE_X;
183 break;
184 case GL_RED:
185 swizzles[0] = SWIZZLE_X;
186 swizzles[1] = SWIZZLE_ZERO;
187 swizzles[2] = SWIZZLE_ZERO;
188 swizzles[3] = SWIZZLE_ONE;
189 break;
190 }
191 }
192
193 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
194
195 /* If the texture's format is alpha-only, force R, G, and B to
196 * 0.0. Similarly, if the texture's format has no alpha channel,
197 * force the alpha value read to 1.0. This allows for the
198 * implementation to use an RGBA texture for any of these formats
199 * without leaking any unexpected values.
200 */
201 switch (img->_BaseFormat) {
202 case GL_ALPHA:
203 swizzles[0] = SWIZZLE_ZERO;
204 swizzles[1] = SWIZZLE_ZERO;
205 swizzles[2] = SWIZZLE_ZERO;
206 break;
207 case GL_LUMINANCE:
208 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
209 swizzles[0] = SWIZZLE_X;
210 swizzles[1] = SWIZZLE_X;
211 swizzles[2] = SWIZZLE_X;
212 swizzles[3] = SWIZZLE_ONE;
213 }
214 break;
215 case GL_LUMINANCE_ALPHA:
216 if (datatype == GL_SIGNED_NORMALIZED) {
217 swizzles[0] = SWIZZLE_X;
218 swizzles[1] = SWIZZLE_X;
219 swizzles[2] = SWIZZLE_X;
220 swizzles[3] = SWIZZLE_W;
221 }
222 break;
223 case GL_INTENSITY:
224 if (datatype == GL_SIGNED_NORMALIZED) {
225 swizzles[0] = SWIZZLE_X;
226 swizzles[1] = SWIZZLE_X;
227 swizzles[2] = SWIZZLE_X;
228 swizzles[3] = SWIZZLE_X;
229 }
230 break;
231 case GL_RED:
232 case GL_RG:
233 case GL_RGB:
234 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
235 swizzles[3] = SWIZZLE_ONE;
236 break;
237 }
238
239 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
240 swizzles[GET_SWZ(t->_Swizzle, 1)],
241 swizzles[GET_SWZ(t->_Swizzle, 2)],
242 swizzles[GET_SWZ(t->_Swizzle, 3)]);
243 }
244
245 static void
246 gen4_emit_buffer_surface_state(struct brw_context *brw,
247 uint32_t *out_offset,
248 drm_intel_bo *bo,
249 unsigned buffer_offset,
250 unsigned surface_format,
251 unsigned buffer_size,
252 unsigned pitch,
253 bool rw)
254 {
255 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
256 6 * 4, 32, out_offset);
257 memset(surf, 0, 6 * 4);
258
259 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
260 surface_format << BRW_SURFACE_FORMAT_SHIFT |
261 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
262 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
263 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
264 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
265 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
266 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
267
268 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
269 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
270 * physical cache. It is mapped in hardware to the sampler cache."
271 */
272 if (bo) {
273 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
274 bo, buffer_offset,
275 I915_GEM_DOMAIN_SAMPLER,
276 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
277 }
278 }
279
280 void
281 brw_update_buffer_texture_surface(struct gl_context *ctx,
282 unsigned unit,
283 uint32_t *surf_offset)
284 {
285 struct brw_context *brw = brw_context(ctx);
286 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
287 struct intel_buffer_object *intel_obj =
288 intel_buffer_object(tObj->BufferObject);
289 uint32_t size = tObj->BufferSize;
290 drm_intel_bo *bo = NULL;
291 mesa_format format = tObj->_BufferObjectFormat;
292 uint32_t brw_format = brw_format_for_mesa_format(format);
293 int texel_size = _mesa_get_format_bytes(format);
294
295 if (intel_obj) {
296 size = MIN2(size, intel_obj->Base.Size);
297 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
298 }
299
300 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
301 _mesa_problem(NULL, "bad format %s for texture buffer\n",
302 _mesa_get_format_name(format));
303 }
304
305 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
306 tObj->BufferOffset,
307 brw_format,
308 size / texel_size,
309 texel_size,
310 false /* rw */);
311 }
312
313 static void
314 brw_update_texture_surface(struct gl_context *ctx,
315 unsigned unit,
316 uint32_t *surf_offset,
317 bool for_gather)
318 {
319 struct brw_context *brw = brw_context(ctx);
320 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
321 struct intel_texture_object *intelObj = intel_texture_object(tObj);
322 struct intel_mipmap_tree *mt = intelObj->mt;
323 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
324 uint32_t *surf;
325
326 /* BRW_NEW_TEXTURE_BUFFER */
327 if (tObj->Target == GL_TEXTURE_BUFFER) {
328 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
329 return;
330 }
331
332 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
333 6 * 4, 32, surf_offset);
334
335 uint32_t tex_format = translate_tex_format(brw, mt->format,
336 sampler->sRGBDecode);
337
338 if (for_gather) {
339 /* Sandybridge's gather4 message is broken for integer formats.
340 * To work around this, we pretend the surface is UNORM for
341 * 8 or 16-bit formats, and emit shader instructions to recover
342 * the real INT/UINT value. For 32-bit formats, we pretend
343 * the surface is FLOAT, and simply reinterpret the resulting
344 * bits.
345 */
346 switch (tex_format) {
347 case BRW_SURFACEFORMAT_R8_SINT:
348 case BRW_SURFACEFORMAT_R8_UINT:
349 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
350 break;
351
352 case BRW_SURFACEFORMAT_R16_SINT:
353 case BRW_SURFACEFORMAT_R16_UINT:
354 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
355 break;
356
357 case BRW_SURFACEFORMAT_R32_SINT:
358 case BRW_SURFACEFORMAT_R32_UINT:
359 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
360 break;
361
362 default:
363 break;
364 }
365 }
366
367 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
368 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
369 BRW_SURFACE_CUBEFACE_ENABLES |
370 tex_format << BRW_SURFACE_FORMAT_SHIFT);
371
372 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
373
374 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
375 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
376 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
377
378 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
379 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
380 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
381
382 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
383 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
384
385 surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
386
387 /* Emit relocation to surface contents */
388 drm_intel_bo_emit_reloc(brw->batch.bo,
389 *surf_offset + 4,
390 mt->bo,
391 surf[1] - mt->bo->offset64,
392 I915_GEM_DOMAIN_SAMPLER, 0);
393 }
394
395 /**
396 * Create the constant buffer surface. Vertex/fragment shader constants will be
397 * read from this buffer with Data Port Read instructions/messages.
398 */
399 void
400 brw_create_constant_surface(struct brw_context *brw,
401 drm_intel_bo *bo,
402 uint32_t offset,
403 uint32_t size,
404 uint32_t *out_offset)
405 {
406 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
407 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
408 size, 1, false);
409 }
410
411 /**
412 * Create the buffer surface. Shader buffer variables will be
413 * read from / write to this buffer with Data Port Read/Write
414 * instructions/messages.
415 */
416 void
417 brw_create_buffer_surface(struct brw_context *brw,
418 drm_intel_bo *bo,
419 uint32_t offset,
420 uint32_t size,
421 uint32_t *out_offset)
422 {
423 /* Use a raw surface so we can reuse existing untyped read/write/atomic
424 * messages. We need these specifically for the fragment shader since they
425 * include a pixel mask header that we need to ensure correct behavior
426 * with helper invocations, which cannot write to the buffer.
427 */
428 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
429 BRW_SURFACEFORMAT_RAW,
430 size, 1, true);
431 }
432
433 /**
434 * Set up a binding table entry for use by stream output logic (transform
435 * feedback).
436 *
437 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
438 */
439 void
440 brw_update_sol_surface(struct brw_context *brw,
441 struct gl_buffer_object *buffer_obj,
442 uint32_t *out_offset, unsigned num_vector_components,
443 unsigned stride_dwords, unsigned offset_dwords)
444 {
445 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
446 uint32_t offset_bytes = 4 * offset_dwords;
447 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
448 offset_bytes,
449 buffer_obj->Size - offset_bytes);
450 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
451 out_offset);
452 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
453 size_t size_dwords = buffer_obj->Size / 4;
454 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
455
456 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
457 * too big to map using a single binding table entry?
458 */
459 assert((size_dwords - offset_dwords) / stride_dwords
460 <= BRW_MAX_NUM_BUFFER_ENTRIES);
461
462 if (size_dwords > offset_dwords + num_vector_components) {
463 /* There is room for at least 1 transform feedback output in the buffer.
464 * Compute the number of additional transform feedback outputs the
465 * buffer has room for.
466 */
467 buffer_size_minus_1 =
468 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
469 } else {
470 /* There isn't even room for a single transform feedback output in the
471 * buffer. We can't configure the binding table entry to prevent output
472 * entirely; we'll have to rely on the geometry shader to detect
473 * overflow. But to minimize the damage in case of a bug, set up the
474 * binding table entry to just allow a single output.
475 */
476 buffer_size_minus_1 = 0;
477 }
478 width = buffer_size_minus_1 & 0x7f;
479 height = (buffer_size_minus_1 & 0xfff80) >> 7;
480 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
481
482 switch (num_vector_components) {
483 case 1:
484 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
485 break;
486 case 2:
487 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
488 break;
489 case 3:
490 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
491 break;
492 case 4:
493 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
494 break;
495 default:
496 unreachable("Invalid vector size for transform feedback output");
497 }
498
499 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
500 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
501 surface_format << BRW_SURFACE_FORMAT_SHIFT |
502 BRW_SURFACE_RC_READ_WRITE;
503 surf[1] = bo->offset64 + offset_bytes; /* reloc */
504 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
505 height << BRW_SURFACE_HEIGHT_SHIFT);
506 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
507 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
508 surf[4] = 0;
509 surf[5] = 0;
510
511 /* Emit relocation to surface contents. */
512 drm_intel_bo_emit_reloc(brw->batch.bo,
513 *out_offset + 4,
514 bo, offset_bytes,
515 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
516 }
517
518 /* Creates a new WM constant buffer reflecting the current fragment program's
519 * constants, if needed by the fragment program.
520 *
521 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
522 * state atom.
523 */
524 static void
525 brw_upload_wm_pull_constants(struct brw_context *brw)
526 {
527 struct brw_stage_state *stage_state = &brw->wm.base;
528 /* BRW_NEW_FRAGMENT_PROGRAM */
529 struct brw_fragment_program *fp =
530 (struct brw_fragment_program *) brw->fragment_program;
531 /* BRW_NEW_FS_PROG_DATA */
532 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
533
534 /* _NEW_PROGRAM_CONSTANTS */
535 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
536 stage_state, prog_data);
537 }
538
539 const struct brw_tracked_state brw_wm_pull_constants = {
540 .dirty = {
541 .mesa = _NEW_PROGRAM_CONSTANTS,
542 .brw = BRW_NEW_BATCH |
543 BRW_NEW_FRAGMENT_PROGRAM |
544 BRW_NEW_FS_PROG_DATA,
545 },
546 .emit = brw_upload_wm_pull_constants,
547 };
548
549 /**
550 * Creates a null renderbuffer surface.
551 *
552 * This is used when the shader doesn't write to any color output. An FB
553 * write to target 0 will still be emitted, because that's how the thread is
554 * terminated (and computed depth is returned), so we need to have the
555 * hardware discard the target 0 color output..
556 */
557 static void
558 brw_emit_null_surface_state(struct brw_context *brw,
559 unsigned width,
560 unsigned height,
561 unsigned samples,
562 uint32_t *out_offset)
563 {
564 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
565 * Notes):
566 *
567 * A null surface will be used in instances where an actual surface is
568 * not bound. When a write message is generated to a null surface, no
569 * actual surface is written to. When a read message (including any
570 * sampling engine message) is generated to a null surface, the result
571 * is all zeros. Note that a null surface type is allowed to be used
572 * with all messages, even if it is not specificially indicated as
573 * supported. All of the remaining fields in surface state are ignored
574 * for null surfaces, with the following exceptions:
575 *
576 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
577 * depth buffer’s corresponding state for all render target surfaces,
578 * including null.
579 *
580 * - Surface Format must be R8G8B8A8_UNORM.
581 */
582 unsigned surface_type = BRW_SURFACE_NULL;
583 drm_intel_bo *bo = NULL;
584 unsigned pitch_minus_1 = 0;
585 uint32_t multisampling_state = 0;
586 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
587 out_offset);
588
589 if (samples > 1) {
590 /* On Gen6, null render targets seem to cause GPU hangs when
591 * multisampling. So work around this problem by rendering into dummy
592 * color buffer.
593 *
594 * To decrease the amount of memory needed by the workaround buffer, we
595 * set its pitch to 128 bytes (the width of a Y tile). This means that
596 * the amount of memory needed for the workaround buffer is
597 * (width_in_tiles + height_in_tiles - 1) tiles.
598 *
599 * Note that since the workaround buffer will be interpreted by the
600 * hardware as an interleaved multisampled buffer, we need to compute
601 * width_in_tiles and height_in_tiles by dividing the width and height
602 * by 16 rather than the normal Y-tile size of 32.
603 */
604 unsigned width_in_tiles = ALIGN(width, 16) / 16;
605 unsigned height_in_tiles = ALIGN(height, 16) / 16;
606 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
607 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
608 size_needed);
609 bo = brw->wm.multisampled_null_render_target_bo;
610 surface_type = BRW_SURFACE_2D;
611 pitch_minus_1 = 127;
612 multisampling_state = brw_get_surface_num_multisamples(samples);
613 }
614
615 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
616 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
617 if (brw->gen < 6) {
618 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
619 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
620 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
621 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
622 }
623 surf[1] = bo ? bo->offset64 : 0;
624 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
625 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
626
627 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
628 * Notes):
629 *
630 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
631 */
632 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
633 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
634 surf[4] = multisampling_state;
635 surf[5] = 0;
636
637 if (bo) {
638 drm_intel_bo_emit_reloc(brw->batch.bo,
639 *out_offset + 4,
640 bo, 0,
641 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
642 }
643 }
644
645 /**
646 * Sets up a surface state structure to point at the given region.
647 * While it is only used for the front/back buffer currently, it should be
648 * usable for further buffers when doing ARB_draw_buffer support.
649 */
650 static uint32_t
651 brw_update_renderbuffer_surface(struct brw_context *brw,
652 struct gl_renderbuffer *rb,
653 bool layered, unsigned unit,
654 uint32_t surf_index)
655 {
656 struct gl_context *ctx = &brw->ctx;
657 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
658 struct intel_mipmap_tree *mt = irb->mt;
659 uint32_t *surf;
660 uint32_t tile_x, tile_y;
661 uint32_t format = 0;
662 uint32_t offset;
663 /* _NEW_BUFFERS */
664 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
665 /* BRW_NEW_FS_PROG_DATA */
666
667 assert(!layered);
668
669 if (rb->TexImage && !brw->has_surface_tile_offset) {
670 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
671
672 if (tile_x != 0 || tile_y != 0) {
673 /* Original gen4 hardware couldn't draw to a non-tile-aligned
674 * destination in a miptree unless you actually setup your renderbuffer
675 * as a miptree and used the fragile lod/array_index/etc. controls to
676 * select the image. So, instead, we just make a new single-level
677 * miptree and render into that.
678 */
679 intel_renderbuffer_move_to_temp(brw, irb, false);
680 mt = irb->mt;
681 }
682 }
683
684 intel_miptree_used_for_rendering(irb->mt);
685
686 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
687
688 format = brw->render_target_format[rb_format];
689 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
690 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
691 __func__, _mesa_get_format_name(rb_format));
692 }
693
694 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
695 format << BRW_SURFACE_FORMAT_SHIFT);
696
697 /* reloc */
698 assert(mt->offset % mt->cpp == 0);
699 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
700 mt->bo->offset64 + mt->offset);
701
702 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
703 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
704
705 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
706 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
707
708 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
709
710 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
711 /* Note that the low bits of these fields are missing, so
712 * there's the possibility of getting in trouble.
713 */
714 assert(tile_x % 4 == 0);
715 assert(tile_y % 2 == 0);
716 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
717 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
718 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
719
720 if (brw->gen < 6) {
721 /* _NEW_COLOR */
722 if (!ctx->Color.ColorLogicOpEnabled &&
723 (ctx->Color.BlendEnabled & (1 << unit)))
724 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
725
726 if (!ctx->Color.ColorMask[unit][0])
727 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
728 if (!ctx->Color.ColorMask[unit][1])
729 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
730 if (!ctx->Color.ColorMask[unit][2])
731 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
732
733 /* As mentioned above, disable writes to the alpha component when the
734 * renderbuffer is XRGB.
735 */
736 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
737 !ctx->Color.ColorMask[unit][3]) {
738 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
739 }
740 }
741
742 drm_intel_bo_emit_reloc(brw->batch.bo,
743 offset + 4,
744 mt->bo,
745 surf[1] - mt->bo->offset64,
746 I915_GEM_DOMAIN_RENDER,
747 I915_GEM_DOMAIN_RENDER);
748
749 return offset;
750 }
751
752 /**
753 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
754 */
755 void
756 brw_update_renderbuffer_surfaces(struct brw_context *brw,
757 const struct gl_framebuffer *fb,
758 uint32_t render_target_start,
759 uint32_t *surf_offset)
760 {
761 GLuint i;
762 const unsigned int w = _mesa_geometric_width(fb);
763 const unsigned int h = _mesa_geometric_height(fb);
764 const unsigned int s = _mesa_geometric_samples(fb);
765
766 /* Update surfaces for drawing buffers */
767 if (fb->_NumColorDrawBuffers >= 1) {
768 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
769 const uint32_t surf_index = render_target_start + i;
770
771 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
772 surf_offset[surf_index] =
773 brw->vtbl.update_renderbuffer_surface(
774 brw, fb->_ColorDrawBuffers[i],
775 _mesa_geometric_layers(fb) > 0, i, surf_index);
776 } else {
777 brw->vtbl.emit_null_surface_state(brw, w, h, s,
778 &surf_offset[surf_index]);
779 }
780 }
781 } else {
782 const uint32_t surf_index = render_target_start;
783 brw->vtbl.emit_null_surface_state(brw, w, h, s,
784 &surf_offset[surf_index]);
785 }
786 }
787
788 static void
789 update_renderbuffer_surfaces(struct brw_context *brw)
790 {
791 const struct gl_context *ctx = &brw->ctx;
792
793 /* _NEW_BUFFERS | _NEW_COLOR */
794 const struct gl_framebuffer *fb = ctx->DrawBuffer;
795 brw_update_renderbuffer_surfaces(
796 brw, fb,
797 brw->wm.prog_data->binding_table.render_target_start,
798 brw->wm.base.surf_offset);
799 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
800 }
801
802 const struct brw_tracked_state brw_renderbuffer_surfaces = {
803 .dirty = {
804 .mesa = _NEW_BUFFERS |
805 _NEW_COLOR,
806 .brw = BRW_NEW_BATCH |
807 BRW_NEW_FS_PROG_DATA,
808 },
809 .emit = update_renderbuffer_surfaces,
810 };
811
812 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
813 .dirty = {
814 .mesa = _NEW_BUFFERS,
815 .brw = BRW_NEW_BATCH,
816 },
817 .emit = update_renderbuffer_surfaces,
818 };
819
820
821 static void
822 update_stage_texture_surfaces(struct brw_context *brw,
823 const struct gl_program *prog,
824 struct brw_stage_state *stage_state,
825 bool for_gather)
826 {
827 if (!prog)
828 return;
829
830 struct gl_context *ctx = &brw->ctx;
831
832 uint32_t *surf_offset = stage_state->surf_offset;
833
834 /* BRW_NEW_*_PROG_DATA */
835 if (for_gather)
836 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
837 else
838 surf_offset += stage_state->prog_data->binding_table.texture_start;
839
840 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
841 for (unsigned s = 0; s < num_samplers; s++) {
842 surf_offset[s] = 0;
843
844 if (prog->SamplersUsed & (1 << s)) {
845 const unsigned unit = prog->SamplerUnits[s];
846
847 /* _NEW_TEXTURE */
848 if (ctx->Texture.Unit[unit]._Current) {
849 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
850 }
851 }
852 }
853 }
854
855
856 /**
857 * Construct SURFACE_STATE objects for enabled textures.
858 */
859 static void
860 brw_update_texture_surfaces(struct brw_context *brw)
861 {
862 /* BRW_NEW_VERTEX_PROGRAM */
863 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
864
865 /* BRW_NEW_TESS_PROGRAMS */
866 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
867 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
868
869 /* BRW_NEW_GEOMETRY_PROGRAM */
870 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
871
872 /* BRW_NEW_FRAGMENT_PROGRAM */
873 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
874
875 /* BRW_NEW_COMPUTE_PROGRAM */
876 struct gl_program *cs = (struct gl_program *) brw->compute_program;
877
878 /* _NEW_TEXTURE */
879 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
880 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false);
881 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false);
882 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
883 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
884 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
885
886 /* emit alternate set of surface state for gather. this
887 * allows the surface format to be overriden for only the
888 * gather4 messages. */
889 if (brw->gen < 8) {
890 if (vs && vs->UsesGather)
891 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
892 if (tcs && tcs->UsesGather)
893 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true);
894 if (tes && tes->UsesGather)
895 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true);
896 if (gs && gs->UsesGather)
897 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
898 if (fs && fs->UsesGather)
899 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
900 if (cs && cs->UsesGather)
901 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
902 }
903
904 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
905 }
906
907 const struct brw_tracked_state brw_texture_surfaces = {
908 .dirty = {
909 .mesa = _NEW_TEXTURE,
910 .brw = BRW_NEW_BATCH |
911 BRW_NEW_COMPUTE_PROGRAM |
912 BRW_NEW_FRAGMENT_PROGRAM |
913 BRW_NEW_FS_PROG_DATA |
914 BRW_NEW_GEOMETRY_PROGRAM |
915 BRW_NEW_GS_PROG_DATA |
916 BRW_NEW_TESS_PROGRAMS |
917 BRW_NEW_TCS_PROG_DATA |
918 BRW_NEW_TES_PROG_DATA |
919 BRW_NEW_TEXTURE_BUFFER |
920 BRW_NEW_VERTEX_PROGRAM |
921 BRW_NEW_VS_PROG_DATA,
922 },
923 .emit = brw_update_texture_surfaces,
924 };
925
926 void
927 brw_upload_ubo_surfaces(struct brw_context *brw,
928 struct gl_shader *shader,
929 struct brw_stage_state *stage_state,
930 struct brw_stage_prog_data *prog_data)
931 {
932 struct gl_context *ctx = &brw->ctx;
933
934 if (!shader)
935 return;
936
937 uint32_t *ubo_surf_offsets =
938 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
939
940 for (int i = 0; i < shader->NumUniformBlocks; i++) {
941 struct gl_uniform_buffer_binding *binding =
942 &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
943
944 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
945 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
946 } else {
947 struct intel_buffer_object *intel_bo =
948 intel_buffer_object(binding->BufferObject);
949 drm_intel_bo *bo =
950 intel_bufferobj_buffer(brw, intel_bo,
951 binding->Offset,
952 binding->BufferObject->Size - binding->Offset);
953 brw_create_constant_surface(brw, bo, binding->Offset,
954 binding->BufferObject->Size - binding->Offset,
955 &ubo_surf_offsets[i]);
956 }
957 }
958
959 uint32_t *ssbo_surf_offsets =
960 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
961
962 for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
963 struct gl_shader_storage_buffer_binding *binding =
964 &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
965
966 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
967 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
968 } else {
969 struct intel_buffer_object *intel_bo =
970 intel_buffer_object(binding->BufferObject);
971 drm_intel_bo *bo =
972 intel_bufferobj_buffer(brw, intel_bo,
973 binding->Offset,
974 binding->BufferObject->Size - binding->Offset);
975 brw_create_buffer_surface(brw, bo, binding->Offset,
976 binding->BufferObject->Size - binding->Offset,
977 &ssbo_surf_offsets[i]);
978 }
979 }
980
981 if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
982 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
983 }
984
985 static void
986 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
987 {
988 struct gl_context *ctx = &brw->ctx;
989 /* _NEW_PROGRAM */
990 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
991
992 if (!prog)
993 return;
994
995 /* BRW_NEW_FS_PROG_DATA */
996 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
997 &brw->wm.base, &brw->wm.prog_data->base);
998 }
999
1000 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1001 .dirty = {
1002 .mesa = _NEW_PROGRAM,
1003 .brw = BRW_NEW_BATCH |
1004 BRW_NEW_FS_PROG_DATA |
1005 BRW_NEW_UNIFORM_BUFFER,
1006 },
1007 .emit = brw_upload_wm_ubo_surfaces,
1008 };
1009
1010 static void
1011 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1012 {
1013 struct gl_context *ctx = &brw->ctx;
1014 /* _NEW_PROGRAM */
1015 struct gl_shader_program *prog =
1016 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1017
1018 if (!prog)
1019 return;
1020
1021 /* BRW_NEW_CS_PROG_DATA */
1022 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1023 &brw->cs.base, &brw->cs.prog_data->base);
1024 }
1025
1026 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1027 .dirty = {
1028 .mesa = _NEW_PROGRAM,
1029 .brw = BRW_NEW_BATCH |
1030 BRW_NEW_CS_PROG_DATA |
1031 BRW_NEW_UNIFORM_BUFFER,
1032 },
1033 .emit = brw_upload_cs_ubo_surfaces,
1034 };
1035
1036 void
1037 brw_upload_abo_surfaces(struct brw_context *brw,
1038 struct gl_shader *shader,
1039 struct brw_stage_state *stage_state,
1040 struct brw_stage_prog_data *prog_data)
1041 {
1042 struct gl_context *ctx = &brw->ctx;
1043 uint32_t *surf_offsets =
1044 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1045
1046 if (shader && shader->NumAtomicBuffers) {
1047 for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1048 struct gl_atomic_buffer_binding *binding =
1049 &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1050 struct intel_buffer_object *intel_bo =
1051 intel_buffer_object(binding->BufferObject);
1052 drm_intel_bo *bo = intel_bufferobj_buffer(
1053 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1054
1055 brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1056 binding->Offset, BRW_SURFACEFORMAT_RAW,
1057 bo->size - binding->Offset, 1, true);
1058 }
1059
1060 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1061 }
1062 }
1063
1064 static void
1065 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1066 {
1067 struct gl_context *ctx = &brw->ctx;
1068 /* _NEW_PROGRAM */
1069 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1070
1071 if (prog) {
1072 /* BRW_NEW_FS_PROG_DATA */
1073 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1074 &brw->wm.base, &brw->wm.prog_data->base);
1075 }
1076 }
1077
1078 const struct brw_tracked_state brw_wm_abo_surfaces = {
1079 .dirty = {
1080 .mesa = _NEW_PROGRAM,
1081 .brw = BRW_NEW_ATOMIC_BUFFER |
1082 BRW_NEW_BATCH |
1083 BRW_NEW_FS_PROG_DATA,
1084 },
1085 .emit = brw_upload_wm_abo_surfaces,
1086 };
1087
1088 static void
1089 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1090 {
1091 struct gl_context *ctx = &brw->ctx;
1092 /* _NEW_PROGRAM */
1093 struct gl_shader_program *prog =
1094 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1095
1096 if (prog) {
1097 /* BRW_NEW_CS_PROG_DATA */
1098 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1099 &brw->cs.base, &brw->cs.prog_data->base);
1100 }
1101 }
1102
1103 const struct brw_tracked_state brw_cs_abo_surfaces = {
1104 .dirty = {
1105 .mesa = _NEW_PROGRAM,
1106 .brw = BRW_NEW_ATOMIC_BUFFER |
1107 BRW_NEW_BATCH |
1108 BRW_NEW_CS_PROG_DATA,
1109 },
1110 .emit = brw_upload_cs_abo_surfaces,
1111 };
1112
1113 static void
1114 brw_upload_cs_image_surfaces(struct brw_context *brw)
1115 {
1116 struct gl_context *ctx = &brw->ctx;
1117 /* _NEW_PROGRAM */
1118 struct gl_shader_program *prog =
1119 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1120
1121 if (prog) {
1122 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1123 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1124 &brw->cs.base, &brw->cs.prog_data->base);
1125 }
1126 }
1127
1128 const struct brw_tracked_state brw_cs_image_surfaces = {
1129 .dirty = {
1130 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1131 .brw = BRW_NEW_BATCH |
1132 BRW_NEW_CS_PROG_DATA |
1133 BRW_NEW_IMAGE_UNITS
1134 },
1135 .emit = brw_upload_cs_image_surfaces,
1136 };
1137
1138 static uint32_t
1139 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1140 {
1141 if (access == GL_WRITE_ONLY) {
1142 return brw_format_for_mesa_format(format);
1143 } else {
1144 /* Typed surface reads support a very limited subset of the shader
1145 * image formats. Translate it into the closest format the
1146 * hardware supports.
1147 */
1148 if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1149 (_mesa_get_format_bytes(format) >= 8 &&
1150 (brw->gen == 7 && !brw->is_haswell)))
1151 return BRW_SURFACEFORMAT_RAW;
1152 else
1153 return brw_format_for_mesa_format(
1154 brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1155 }
1156 }
1157
1158 static void
1159 update_default_image_param(struct brw_context *brw,
1160 struct gl_image_unit *u,
1161 unsigned surface_idx,
1162 struct brw_image_param *param)
1163 {
1164 memset(param, 0, sizeof(*param));
1165 param->surface_idx = surface_idx;
1166 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1167 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1168 * detailed explanation of these parameters.
1169 */
1170 param->swizzling[0] = 0xff;
1171 param->swizzling[1] = 0xff;
1172 }
1173
1174 static void
1175 update_buffer_image_param(struct brw_context *brw,
1176 struct gl_image_unit *u,
1177 unsigned surface_idx,
1178 struct brw_image_param *param)
1179 {
1180 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1181
1182 update_default_image_param(brw, u, surface_idx, param);
1183
1184 param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1185 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1186 }
1187
1188 static void
1189 update_texture_image_param(struct brw_context *brw,
1190 struct gl_image_unit *u,
1191 unsigned surface_idx,
1192 struct brw_image_param *param)
1193 {
1194 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1195
1196 update_default_image_param(brw, u, surface_idx, param);
1197
1198 param->size[0] = minify(mt->logical_width0, u->Level);
1199 param->size[1] = minify(mt->logical_height0, u->Level);
1200 param->size[2] = (!u->Layered ? 1 :
1201 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1202 u->TexObj->Target == GL_TEXTURE_3D ?
1203 minify(mt->logical_depth0, u->Level) :
1204 mt->logical_depth0);
1205
1206 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1207 &param->offset[0],
1208 &param->offset[1]);
1209
1210 param->stride[0] = mt->cpp;
1211 param->stride[1] = mt->pitch / mt->cpp;
1212 param->stride[2] =
1213 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1214 param->stride[3] =
1215 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1216
1217 if (mt->tiling == I915_TILING_X) {
1218 /* An X tile is a rectangular block of 512x8 bytes. */
1219 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1220 param->tiling[1] = _mesa_logbase2(8);
1221
1222 if (brw->has_swizzling) {
1223 /* Right shifts required to swizzle bits 9 and 10 of the memory
1224 * address with bit 6.
1225 */
1226 param->swizzling[0] = 3;
1227 param->swizzling[1] = 4;
1228 }
1229 } else if (mt->tiling == I915_TILING_Y) {
1230 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1231 * different to the layout of an X-tiled surface, we simply pretend that
1232 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1233 * one arranged in X-major order just like is the case for X-tiling.
1234 */
1235 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1236 param->tiling[1] = _mesa_logbase2(32);
1237
1238 if (brw->has_swizzling) {
1239 /* Right shift required to swizzle bit 9 of the memory address with
1240 * bit 6.
1241 */
1242 param->swizzling[0] = 3;
1243 }
1244 }
1245
1246 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1247 * address calculation algorithm (emit_address_calculation() in
1248 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1249 * modulus equal to the LOD.
1250 */
1251 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1252 0);
1253 }
1254
1255 static void
1256 update_image_surface(struct brw_context *brw,
1257 struct gl_image_unit *u,
1258 GLenum access,
1259 unsigned surface_idx,
1260 uint32_t *surf_offset,
1261 struct brw_image_param *param)
1262 {
1263 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1264 struct gl_texture_object *obj = u->TexObj;
1265 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1266
1267 if (obj->Target == GL_TEXTURE_BUFFER) {
1268 struct intel_buffer_object *intel_obj =
1269 intel_buffer_object(obj->BufferObject);
1270 const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1271 _mesa_get_format_bytes(u->_ActualFormat));
1272
1273 brw->vtbl.emit_buffer_surface_state(
1274 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1275 format, intel_obj->Base.Size / texel_size, texel_size,
1276 access != GL_READ_ONLY);
1277
1278 update_buffer_image_param(brw, u, surface_idx, param);
1279
1280 } else {
1281 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1282 struct intel_mipmap_tree *mt = intel_obj->mt;
1283
1284 if (format == BRW_SURFACEFORMAT_RAW) {
1285 brw->vtbl.emit_buffer_surface_state(
1286 brw, surf_offset, mt->bo, mt->offset,
1287 format, mt->bo->size - mt->offset, 1 /* pitch */,
1288 access != GL_READ_ONLY);
1289
1290 } else {
1291 const unsigned min_layer = obj->MinLayer + u->_Layer;
1292 const unsigned min_level = obj->MinLevel + u->Level;
1293 const unsigned num_layers = (!u->Layered ? 1 :
1294 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1295 mt->logical_depth0);
1296 const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1297 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1298 GL_TEXTURE_2D_ARRAY : obj->Target);
1299
1300 brw->vtbl.emit_texture_surface_state(
1301 brw, mt, target,
1302 min_layer, min_layer + num_layers,
1303 min_level, min_level + 1,
1304 format, SWIZZLE_XYZW,
1305 surf_offset, access != GL_READ_ONLY, false);
1306 }
1307
1308 update_texture_image_param(brw, u, surface_idx, param);
1309 }
1310
1311 } else {
1312 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1313 update_default_image_param(brw, u, surface_idx, param);
1314 }
1315 }
1316
1317 void
1318 brw_upload_image_surfaces(struct brw_context *brw,
1319 struct gl_shader *shader,
1320 struct brw_stage_state *stage_state,
1321 struct brw_stage_prog_data *prog_data)
1322 {
1323 struct gl_context *ctx = &brw->ctx;
1324
1325 if (shader && shader->NumImages) {
1326 for (unsigned i = 0; i < shader->NumImages; i++) {
1327 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1328 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1329
1330 update_image_surface(brw, u, shader->ImageAccess[i],
1331 surf_idx,
1332 &stage_state->surf_offset[surf_idx],
1333 &prog_data->image_param[i]);
1334 }
1335
1336 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1337 }
1338 }
1339
1340 static void
1341 brw_upload_wm_image_surfaces(struct brw_context *brw)
1342 {
1343 struct gl_context *ctx = &brw->ctx;
1344 /* BRW_NEW_FRAGMENT_PROGRAM */
1345 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1346
1347 if (prog) {
1348 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1349 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1350 &brw->wm.base, &brw->wm.prog_data->base);
1351 }
1352 }
1353
1354 const struct brw_tracked_state brw_wm_image_surfaces = {
1355 .dirty = {
1356 .mesa = _NEW_TEXTURE,
1357 .brw = BRW_NEW_BATCH |
1358 BRW_NEW_FRAGMENT_PROGRAM |
1359 BRW_NEW_FS_PROG_DATA |
1360 BRW_NEW_IMAGE_UNITS
1361 },
1362 .emit = brw_upload_wm_image_surfaces,
1363 };
1364
1365 void
1366 gen4_init_vtable_surface_functions(struct brw_context *brw)
1367 {
1368 brw->vtbl.update_texture_surface = brw_update_texture_surface;
1369 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1370 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1371 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1372 }
1373
1374 static void
1375 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1376 {
1377 struct gl_context *ctx = &brw->ctx;
1378 /* _NEW_PROGRAM */
1379 struct gl_shader_program *prog =
1380 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1381
1382 if (prog && brw->cs.prog_data->uses_num_work_groups) {
1383 const unsigned surf_idx =
1384 brw->cs.prog_data->binding_table.work_groups_start;
1385 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1386 drm_intel_bo *bo;
1387 uint32_t bo_offset;
1388
1389 if (brw->compute.num_work_groups_bo == NULL) {
1390 bo = NULL;
1391 intel_upload_data(brw,
1392 (void *)brw->compute.num_work_groups,
1393 3 * sizeof(GLuint),
1394 sizeof(GLuint),
1395 &bo,
1396 &bo_offset);
1397 } else {
1398 bo = brw->compute.num_work_groups_bo;
1399 bo_offset = brw->compute.num_work_groups_offset;
1400 }
1401
1402 brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1403 bo, bo_offset,
1404 BRW_SURFACEFORMAT_RAW,
1405 3 * sizeof(GLuint), 1, true);
1406 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1407 }
1408 }
1409
1410 const struct brw_tracked_state brw_cs_work_groups_surface = {
1411 .dirty = {
1412 .brw = BRW_NEW_CS_WORK_GROUPS
1413 },
1414 .emit = brw_upload_cs_work_groups_surface,
1415 };