Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38 #include "main/framebuffer.h"
39
40 #include "intel_mipmap_tree.h"
41 #include "intel_batchbuffer.h"
42 #include "intel_tex.h"
43 #include "intel_fbo.h"
44 #include "intel_buffer_objects.h"
45
46 #include "brw_context.h"
47 #include "brw_state.h"
48 #include "brw_defines.h"
49 #include "brw_wm.h"
50
51 GLuint
52 translate_tex_target(GLenum target)
53 {
54 switch (target) {
55 case GL_TEXTURE_1D:
56 case GL_TEXTURE_1D_ARRAY_EXT:
57 return BRW_SURFACE_1D;
58
59 case GL_TEXTURE_RECTANGLE_NV:
60 return BRW_SURFACE_2D;
61
62 case GL_TEXTURE_2D:
63 case GL_TEXTURE_2D_ARRAY_EXT:
64 case GL_TEXTURE_EXTERNAL_OES:
65 case GL_TEXTURE_2D_MULTISAMPLE:
66 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
67 return BRW_SURFACE_2D;
68
69 case GL_TEXTURE_3D:
70 return BRW_SURFACE_3D;
71
72 case GL_TEXTURE_CUBE_MAP:
73 case GL_TEXTURE_CUBE_MAP_ARRAY:
74 return BRW_SURFACE_CUBE;
75
76 default:
77 unreachable("not reached");
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104 void
105 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
106 bool is_render_target,
107 unsigned *width, unsigned *height,
108 unsigned *pitch, uint32_t *tiling, unsigned *format)
109 {
110 static const unsigned halign_stencil = 8;
111
112 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
113 * there are half as many rows.
114 * In addition, mip-levels are accessed manually by the program and
115 * therefore the surface is setup to cover all the mip-levels for one slice.
116 * (Hardware is still used to access individual slices).
117 */
118 *tiling = I915_TILING_Y;
119 *pitch = mt->pitch * 2;
120 *width = ALIGN(mt->total_width, halign_stencil) * 2;
121 *height = (mt->total_height / mt->physical_depth0) / 2;
122
123 if (is_render_target) {
124 *format = BRW_SURFACEFORMAT_R8_UINT;
125 }
126 }
127
128
129 /**
130 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
131 * swizzling.
132 */
133 int
134 brw_get_texture_swizzle(const struct gl_context *ctx,
135 const struct gl_texture_object *t)
136 {
137 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
138
139 int swizzles[SWIZZLE_NIL + 1] = {
140 SWIZZLE_X,
141 SWIZZLE_Y,
142 SWIZZLE_Z,
143 SWIZZLE_W,
144 SWIZZLE_ZERO,
145 SWIZZLE_ONE,
146 SWIZZLE_NIL
147 };
148
149 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
150 img->_BaseFormat == GL_DEPTH_STENCIL) {
151 GLenum depth_mode = t->DepthMode;
152
153 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
154 * with depth component data specified with a sized internal format.
155 * Otherwise, it's left at the old default, GL_LUMINANCE.
156 */
157 if (_mesa_is_gles3(ctx) &&
158 img->InternalFormat != GL_DEPTH_COMPONENT &&
159 img->InternalFormat != GL_DEPTH_STENCIL) {
160 depth_mode = GL_RED;
161 }
162
163 switch (depth_mode) {
164 case GL_ALPHA:
165 swizzles[0] = SWIZZLE_ZERO;
166 swizzles[1] = SWIZZLE_ZERO;
167 swizzles[2] = SWIZZLE_ZERO;
168 swizzles[3] = SWIZZLE_X;
169 break;
170 case GL_LUMINANCE:
171 swizzles[0] = SWIZZLE_X;
172 swizzles[1] = SWIZZLE_X;
173 swizzles[2] = SWIZZLE_X;
174 swizzles[3] = SWIZZLE_ONE;
175 break;
176 case GL_INTENSITY:
177 swizzles[0] = SWIZZLE_X;
178 swizzles[1] = SWIZZLE_X;
179 swizzles[2] = SWIZZLE_X;
180 swizzles[3] = SWIZZLE_X;
181 break;
182 case GL_RED:
183 swizzles[0] = SWIZZLE_X;
184 swizzles[1] = SWIZZLE_ZERO;
185 swizzles[2] = SWIZZLE_ZERO;
186 swizzles[3] = SWIZZLE_ONE;
187 break;
188 }
189 }
190
191 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
192
193 /* If the texture's format is alpha-only, force R, G, and B to
194 * 0.0. Similarly, if the texture's format has no alpha channel,
195 * force the alpha value read to 1.0. This allows for the
196 * implementation to use an RGBA texture for any of these formats
197 * without leaking any unexpected values.
198 */
199 switch (img->_BaseFormat) {
200 case GL_ALPHA:
201 swizzles[0] = SWIZZLE_ZERO;
202 swizzles[1] = SWIZZLE_ZERO;
203 swizzles[2] = SWIZZLE_ZERO;
204 break;
205 case GL_LUMINANCE:
206 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
207 swizzles[0] = SWIZZLE_X;
208 swizzles[1] = SWIZZLE_X;
209 swizzles[2] = SWIZZLE_X;
210 swizzles[3] = SWIZZLE_ONE;
211 }
212 break;
213 case GL_LUMINANCE_ALPHA:
214 if (datatype == GL_SIGNED_NORMALIZED) {
215 swizzles[0] = SWIZZLE_X;
216 swizzles[1] = SWIZZLE_X;
217 swizzles[2] = SWIZZLE_X;
218 swizzles[3] = SWIZZLE_W;
219 }
220 break;
221 case GL_INTENSITY:
222 if (datatype == GL_SIGNED_NORMALIZED) {
223 swizzles[0] = SWIZZLE_X;
224 swizzles[1] = SWIZZLE_X;
225 swizzles[2] = SWIZZLE_X;
226 swizzles[3] = SWIZZLE_X;
227 }
228 break;
229 case GL_RED:
230 case GL_RG:
231 case GL_RGB:
232 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
233 swizzles[3] = SWIZZLE_ONE;
234 break;
235 }
236
237 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
238 swizzles[GET_SWZ(t->_Swizzle, 1)],
239 swizzles[GET_SWZ(t->_Swizzle, 2)],
240 swizzles[GET_SWZ(t->_Swizzle, 3)]);
241 }
242
243 static void
244 gen4_emit_buffer_surface_state(struct brw_context *brw,
245 uint32_t *out_offset,
246 drm_intel_bo *bo,
247 unsigned buffer_offset,
248 unsigned surface_format,
249 unsigned buffer_size,
250 unsigned pitch,
251 bool rw)
252 {
253 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
254 6 * 4, 32, out_offset);
255 memset(surf, 0, 6 * 4);
256
257 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
258 surface_format << BRW_SURFACE_FORMAT_SHIFT |
259 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
260 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
261 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
262 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
263 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
264 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
265
266 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
267 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
268 * physical cache. It is mapped in hardware to the sampler cache."
269 */
270 if (bo) {
271 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
272 bo, buffer_offset,
273 I915_GEM_DOMAIN_SAMPLER,
274 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
275 }
276 }
277
278 void
279 brw_update_buffer_texture_surface(struct gl_context *ctx,
280 unsigned unit,
281 uint32_t *surf_offset)
282 {
283 struct brw_context *brw = brw_context(ctx);
284 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
285 struct intel_buffer_object *intel_obj =
286 intel_buffer_object(tObj->BufferObject);
287 uint32_t size = tObj->BufferSize;
288 drm_intel_bo *bo = NULL;
289 mesa_format format = tObj->_BufferObjectFormat;
290 uint32_t brw_format = brw_format_for_mesa_format(format);
291 int texel_size = _mesa_get_format_bytes(format);
292
293 if (intel_obj) {
294 size = MIN2(size, intel_obj->Base.Size);
295 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
296 }
297
298 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
299 _mesa_problem(NULL, "bad format %s for texture buffer\n",
300 _mesa_get_format_name(format));
301 }
302
303 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
304 tObj->BufferOffset,
305 brw_format,
306 size / texel_size,
307 texel_size,
308 false /* rw */);
309 }
310
311 static void
312 brw_update_texture_surface(struct gl_context *ctx,
313 unsigned unit,
314 uint32_t *surf_offset,
315 bool for_gather)
316 {
317 struct brw_context *brw = brw_context(ctx);
318 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
319 struct intel_texture_object *intelObj = intel_texture_object(tObj);
320 struct intel_mipmap_tree *mt = intelObj->mt;
321 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
322 uint32_t *surf;
323
324 /* BRW_NEW_TEXTURE_BUFFER */
325 if (tObj->Target == GL_TEXTURE_BUFFER) {
326 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
327 return;
328 }
329
330 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
331 6 * 4, 32, surf_offset);
332
333 uint32_t tex_format = translate_tex_format(brw, mt->format,
334 sampler->sRGBDecode);
335
336 if (for_gather) {
337 /* Sandybridge's gather4 message is broken for integer formats.
338 * To work around this, we pretend the surface is UNORM for
339 * 8 or 16-bit formats, and emit shader instructions to recover
340 * the real INT/UINT value. For 32-bit formats, we pretend
341 * the surface is FLOAT, and simply reinterpret the resulting
342 * bits.
343 */
344 switch (tex_format) {
345 case BRW_SURFACEFORMAT_R8_SINT:
346 case BRW_SURFACEFORMAT_R8_UINT:
347 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
348 break;
349
350 case BRW_SURFACEFORMAT_R16_SINT:
351 case BRW_SURFACEFORMAT_R16_UINT:
352 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
353 break;
354
355 case BRW_SURFACEFORMAT_R32_SINT:
356 case BRW_SURFACEFORMAT_R32_UINT:
357 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
358 break;
359
360 default:
361 break;
362 }
363 }
364
365 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
366 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
367 BRW_SURFACE_CUBEFACE_ENABLES |
368 tex_format << BRW_SURFACE_FORMAT_SHIFT);
369
370 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
371
372 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
373 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
374 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
375
376 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
377 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
378 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
379
380 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
381 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
382
383 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
384
385 /* Emit relocation to surface contents */
386 drm_intel_bo_emit_reloc(brw->batch.bo,
387 *surf_offset + 4,
388 mt->bo,
389 surf[1] - mt->bo->offset64,
390 I915_GEM_DOMAIN_SAMPLER, 0);
391 }
392
393 /**
394 * Create the constant buffer surface. Vertex/fragment shader constants will be
395 * read from this buffer with Data Port Read instructions/messages.
396 */
397 void
398 brw_create_constant_surface(struct brw_context *brw,
399 drm_intel_bo *bo,
400 uint32_t offset,
401 uint32_t size,
402 uint32_t *out_offset,
403 bool dword_pitch)
404 {
405 uint32_t stride = dword_pitch ? 4 : 16;
406 uint32_t elements = ALIGN(size, stride) / stride;
407
408 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
409 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
410 elements, stride, false);
411 }
412
413 /**
414 * Set up a binding table entry for use by stream output logic (transform
415 * feedback).
416 *
417 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
418 */
419 void
420 brw_update_sol_surface(struct brw_context *brw,
421 struct gl_buffer_object *buffer_obj,
422 uint32_t *out_offset, unsigned num_vector_components,
423 unsigned stride_dwords, unsigned offset_dwords)
424 {
425 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
426 uint32_t offset_bytes = 4 * offset_dwords;
427 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
428 offset_bytes,
429 buffer_obj->Size - offset_bytes);
430 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
431 out_offset);
432 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
433 size_t size_dwords = buffer_obj->Size / 4;
434 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
435
436 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
437 * too big to map using a single binding table entry?
438 */
439 assert((size_dwords - offset_dwords) / stride_dwords
440 <= BRW_MAX_NUM_BUFFER_ENTRIES);
441
442 if (size_dwords > offset_dwords + num_vector_components) {
443 /* There is room for at least 1 transform feedback output in the buffer.
444 * Compute the number of additional transform feedback outputs the
445 * buffer has room for.
446 */
447 buffer_size_minus_1 =
448 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
449 } else {
450 /* There isn't even room for a single transform feedback output in the
451 * buffer. We can't configure the binding table entry to prevent output
452 * entirely; we'll have to rely on the geometry shader to detect
453 * overflow. But to minimize the damage in case of a bug, set up the
454 * binding table entry to just allow a single output.
455 */
456 buffer_size_minus_1 = 0;
457 }
458 width = buffer_size_minus_1 & 0x7f;
459 height = (buffer_size_minus_1 & 0xfff80) >> 7;
460 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
461
462 switch (num_vector_components) {
463 case 1:
464 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
465 break;
466 case 2:
467 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
468 break;
469 case 3:
470 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
471 break;
472 case 4:
473 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
474 break;
475 default:
476 unreachable("Invalid vector size for transform feedback output");
477 }
478
479 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
480 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
481 surface_format << BRW_SURFACE_FORMAT_SHIFT |
482 BRW_SURFACE_RC_READ_WRITE;
483 surf[1] = bo->offset64 + offset_bytes; /* reloc */
484 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
485 height << BRW_SURFACE_HEIGHT_SHIFT);
486 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
487 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
488 surf[4] = 0;
489 surf[5] = 0;
490
491 /* Emit relocation to surface contents. */
492 drm_intel_bo_emit_reloc(brw->batch.bo,
493 *out_offset + 4,
494 bo, offset_bytes,
495 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
496 }
497
498 /* Creates a new WM constant buffer reflecting the current fragment program's
499 * constants, if needed by the fragment program.
500 *
501 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
502 * state atom.
503 */
504 static void
505 brw_upload_wm_pull_constants(struct brw_context *brw)
506 {
507 struct brw_stage_state *stage_state = &brw->wm.base;
508 /* BRW_NEW_FRAGMENT_PROGRAM */
509 struct brw_fragment_program *fp =
510 (struct brw_fragment_program *) brw->fragment_program;
511 /* BRW_NEW_FS_PROG_DATA */
512 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
513
514 /* _NEW_PROGRAM_CONSTANTS */
515 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
516 stage_state, prog_data, true);
517 }
518
519 const struct brw_tracked_state brw_wm_pull_constants = {
520 .dirty = {
521 .mesa = _NEW_PROGRAM_CONSTANTS,
522 .brw = BRW_NEW_BATCH |
523 BRW_NEW_FRAGMENT_PROGRAM |
524 BRW_NEW_FS_PROG_DATA,
525 },
526 .emit = brw_upload_wm_pull_constants,
527 };
528
529 /**
530 * Creates a null renderbuffer surface.
531 *
532 * This is used when the shader doesn't write to any color output. An FB
533 * write to target 0 will still be emitted, because that's how the thread is
534 * terminated (and computed depth is returned), so we need to have the
535 * hardware discard the target 0 color output..
536 */
537 static void
538 brw_emit_null_surface_state(struct brw_context *brw,
539 unsigned width,
540 unsigned height,
541 unsigned samples,
542 uint32_t *out_offset)
543 {
544 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
545 * Notes):
546 *
547 * A null surface will be used in instances where an actual surface is
548 * not bound. When a write message is generated to a null surface, no
549 * actual surface is written to. When a read message (including any
550 * sampling engine message) is generated to a null surface, the result
551 * is all zeros. Note that a null surface type is allowed to be used
552 * with all messages, even if it is not specificially indicated as
553 * supported. All of the remaining fields in surface state are ignored
554 * for null surfaces, with the following exceptions:
555 *
556 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
557 * depth buffer’s corresponding state for all render target surfaces,
558 * including null.
559 *
560 * - Surface Format must be R8G8B8A8_UNORM.
561 */
562 unsigned surface_type = BRW_SURFACE_NULL;
563 drm_intel_bo *bo = NULL;
564 unsigned pitch_minus_1 = 0;
565 uint32_t multisampling_state = 0;
566 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
567 out_offset);
568
569 if (samples > 1) {
570 /* On Gen6, null render targets seem to cause GPU hangs when
571 * multisampling. So work around this problem by rendering into dummy
572 * color buffer.
573 *
574 * To decrease the amount of memory needed by the workaround buffer, we
575 * set its pitch to 128 bytes (the width of a Y tile). This means that
576 * the amount of memory needed for the workaround buffer is
577 * (width_in_tiles + height_in_tiles - 1) tiles.
578 *
579 * Note that since the workaround buffer will be interpreted by the
580 * hardware as an interleaved multisampled buffer, we need to compute
581 * width_in_tiles and height_in_tiles by dividing the width and height
582 * by 16 rather than the normal Y-tile size of 32.
583 */
584 unsigned width_in_tiles = ALIGN(width, 16) / 16;
585 unsigned height_in_tiles = ALIGN(height, 16) / 16;
586 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
587 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
588 size_needed);
589 bo = brw->wm.multisampled_null_render_target_bo;
590 surface_type = BRW_SURFACE_2D;
591 pitch_minus_1 = 127;
592 multisampling_state = brw_get_surface_num_multisamples(samples);
593 }
594
595 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
596 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
597 if (brw->gen < 6) {
598 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
599 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
600 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
601 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
602 }
603 surf[1] = bo ? bo->offset64 : 0;
604 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
605 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
606
607 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
608 * Notes):
609 *
610 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
611 */
612 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
613 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
614 surf[4] = multisampling_state;
615 surf[5] = 0;
616
617 if (bo) {
618 drm_intel_bo_emit_reloc(brw->batch.bo,
619 *out_offset + 4,
620 bo, 0,
621 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
622 }
623 }
624
625 /**
626 * Sets up a surface state structure to point at the given region.
627 * While it is only used for the front/back buffer currently, it should be
628 * usable for further buffers when doing ARB_draw_buffer support.
629 */
630 static uint32_t
631 brw_update_renderbuffer_surface(struct brw_context *brw,
632 struct gl_renderbuffer *rb,
633 bool layered, unsigned unit,
634 uint32_t surf_index)
635 {
636 struct gl_context *ctx = &brw->ctx;
637 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
638 struct intel_mipmap_tree *mt = irb->mt;
639 uint32_t *surf;
640 uint32_t tile_x, tile_y;
641 uint32_t format = 0;
642 uint32_t offset;
643 /* _NEW_BUFFERS */
644 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
645 /* BRW_NEW_FS_PROG_DATA */
646
647 assert(!layered);
648
649 if (rb->TexImage && !brw->has_surface_tile_offset) {
650 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
651
652 if (tile_x != 0 || tile_y != 0) {
653 /* Original gen4 hardware couldn't draw to a non-tile-aligned
654 * destination in a miptree unless you actually setup your renderbuffer
655 * as a miptree and used the fragile lod/array_index/etc. controls to
656 * select the image. So, instead, we just make a new single-level
657 * miptree and render into that.
658 */
659 intel_renderbuffer_move_to_temp(brw, irb, false);
660 mt = irb->mt;
661 }
662 }
663
664 intel_miptree_used_for_rendering(irb->mt);
665
666 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
667
668 format = brw->render_target_format[rb_format];
669 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
670 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
671 __func__, _mesa_get_format_name(rb_format));
672 }
673
674 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
675 format << BRW_SURFACE_FORMAT_SHIFT);
676
677 /* reloc */
678 assert(mt->offset % mt->cpp == 0);
679 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
680 mt->bo->offset64 + mt->offset);
681
682 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
683 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
684
685 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
686 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
687
688 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
689
690 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
691 /* Note that the low bits of these fields are missing, so
692 * there's the possibility of getting in trouble.
693 */
694 assert(tile_x % 4 == 0);
695 assert(tile_y % 2 == 0);
696 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
697 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
698 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
699
700 if (brw->gen < 6) {
701 /* _NEW_COLOR */
702 if (!ctx->Color.ColorLogicOpEnabled &&
703 (ctx->Color.BlendEnabled & (1 << unit)))
704 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
705
706 if (!ctx->Color.ColorMask[unit][0])
707 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
708 if (!ctx->Color.ColorMask[unit][1])
709 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
710 if (!ctx->Color.ColorMask[unit][2])
711 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
712
713 /* As mentioned above, disable writes to the alpha component when the
714 * renderbuffer is XRGB.
715 */
716 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
717 !ctx->Color.ColorMask[unit][3]) {
718 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
719 }
720 }
721
722 drm_intel_bo_emit_reloc(brw->batch.bo,
723 offset + 4,
724 mt->bo,
725 surf[1] - mt->bo->offset64,
726 I915_GEM_DOMAIN_RENDER,
727 I915_GEM_DOMAIN_RENDER);
728
729 return offset;
730 }
731
732 /**
733 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
734 */
735 void
736 brw_update_renderbuffer_surfaces(struct brw_context *brw,
737 const struct gl_framebuffer *fb,
738 uint32_t render_target_start,
739 uint32_t *surf_offset)
740 {
741 GLuint i;
742 const unsigned int w = _mesa_geometric_width(fb);
743 const unsigned int h = _mesa_geometric_height(fb);
744 const unsigned int s = _mesa_geometric_samples(fb);
745
746 /* Update surfaces for drawing buffers */
747 if (fb->_NumColorDrawBuffers >= 1) {
748 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
749 const uint32_t surf_index = render_target_start + i;
750
751 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
752 surf_offset[surf_index] =
753 brw->vtbl.update_renderbuffer_surface(
754 brw, fb->_ColorDrawBuffers[i],
755 _mesa_geometric_layers(fb) > 0, i, surf_index);
756 } else {
757 brw->vtbl.emit_null_surface_state(brw, w, h, s,
758 &surf_offset[surf_index]);
759 }
760 }
761 } else {
762 const uint32_t surf_index = render_target_start;
763 brw->vtbl.emit_null_surface_state(brw, w, h, s,
764 &surf_offset[surf_index]);
765 }
766 }
767
768 static void
769 update_renderbuffer_surfaces(struct brw_context *brw)
770 {
771 const struct gl_context *ctx = &brw->ctx;
772
773 /* _NEW_BUFFERS | _NEW_COLOR */
774 const struct gl_framebuffer *fb = ctx->DrawBuffer;
775 brw_update_renderbuffer_surfaces(
776 brw, fb,
777 brw->wm.prog_data->binding_table.render_target_start,
778 brw->wm.base.surf_offset);
779 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
780 }
781
782 const struct brw_tracked_state brw_renderbuffer_surfaces = {
783 .dirty = {
784 .mesa = _NEW_BUFFERS |
785 _NEW_COLOR,
786 .brw = BRW_NEW_BATCH |
787 BRW_NEW_FS_PROG_DATA,
788 },
789 .emit = update_renderbuffer_surfaces,
790 };
791
792 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
793 .dirty = {
794 .mesa = _NEW_BUFFERS,
795 .brw = BRW_NEW_BATCH,
796 },
797 .emit = update_renderbuffer_surfaces,
798 };
799
800
801 static void
802 update_stage_texture_surfaces(struct brw_context *brw,
803 const struct gl_program *prog,
804 struct brw_stage_state *stage_state,
805 bool for_gather)
806 {
807 if (!prog)
808 return;
809
810 struct gl_context *ctx = &brw->ctx;
811
812 uint32_t *surf_offset = stage_state->surf_offset;
813
814 /* BRW_NEW_*_PROG_DATA */
815 if (for_gather)
816 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
817 else
818 surf_offset += stage_state->prog_data->binding_table.texture_start;
819
820 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
821 for (unsigned s = 0; s < num_samplers; s++) {
822 surf_offset[s] = 0;
823
824 if (prog->SamplersUsed & (1 << s)) {
825 const unsigned unit = prog->SamplerUnits[s];
826
827 /* _NEW_TEXTURE */
828 if (ctx->Texture.Unit[unit]._Current) {
829 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
830 }
831 }
832 }
833 }
834
835
836 /**
837 * Construct SURFACE_STATE objects for enabled textures.
838 */
839 static void
840 brw_update_texture_surfaces(struct brw_context *brw)
841 {
842 /* BRW_NEW_VERTEX_PROGRAM */
843 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
844
845 /* BRW_NEW_GEOMETRY_PROGRAM */
846 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
847
848 /* BRW_NEW_FRAGMENT_PROGRAM */
849 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
850
851 /* _NEW_TEXTURE */
852 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
853 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
854 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
855
856 /* emit alternate set of surface state for gather. this
857 * allows the surface format to be overriden for only the
858 * gather4 messages. */
859 if (brw->gen < 8) {
860 if (vs && vs->UsesGather)
861 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
862 if (gs && gs->UsesGather)
863 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
864 if (fs && fs->UsesGather)
865 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
866 }
867
868 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
869 }
870
871 const struct brw_tracked_state brw_texture_surfaces = {
872 .dirty = {
873 .mesa = _NEW_TEXTURE,
874 .brw = BRW_NEW_BATCH |
875 BRW_NEW_FRAGMENT_PROGRAM |
876 BRW_NEW_FS_PROG_DATA |
877 BRW_NEW_GEOMETRY_PROGRAM |
878 BRW_NEW_GS_PROG_DATA |
879 BRW_NEW_TEXTURE_BUFFER |
880 BRW_NEW_VERTEX_PROGRAM |
881 BRW_NEW_VS_PROG_DATA,
882 },
883 .emit = brw_update_texture_surfaces,
884 };
885
886 void
887 brw_upload_ubo_surfaces(struct brw_context *brw,
888 struct gl_shader *shader,
889 struct brw_stage_state *stage_state,
890 struct brw_stage_prog_data *prog_data,
891 bool dword_pitch)
892 {
893 struct gl_context *ctx = &brw->ctx;
894
895 if (!shader)
896 return;
897
898 uint32_t *surf_offsets =
899 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
900
901 for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
902 struct gl_uniform_buffer_binding *binding;
903 struct intel_buffer_object *intel_bo;
904
905 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
906 intel_bo = intel_buffer_object(binding->BufferObject);
907 drm_intel_bo *bo =
908 intel_bufferobj_buffer(brw, intel_bo,
909 binding->Offset,
910 binding->BufferObject->Size - binding->Offset);
911
912 /* Because behavior for referencing outside of the binding's size in the
913 * glBindBufferRange case is undefined, we can just bind the whole buffer
914 * glBindBufferBase wants and be a correct implementation.
915 */
916 brw_create_constant_surface(brw, bo, binding->Offset,
917 bo->size - binding->Offset,
918 &surf_offsets[i],
919 dword_pitch);
920 }
921
922 if (shader->NumUniformBlocks)
923 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
924 }
925
926 static void
927 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
928 {
929 struct gl_context *ctx = &brw->ctx;
930 /* _NEW_PROGRAM */
931 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
932
933 if (!prog)
934 return;
935
936 /* BRW_NEW_FS_PROG_DATA */
937 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
938 &brw->wm.base, &brw->wm.prog_data->base, true);
939 }
940
941 const struct brw_tracked_state brw_wm_ubo_surfaces = {
942 .dirty = {
943 .mesa = _NEW_PROGRAM,
944 .brw = BRW_NEW_BATCH |
945 BRW_NEW_FS_PROG_DATA |
946 BRW_NEW_UNIFORM_BUFFER,
947 },
948 .emit = brw_upload_wm_ubo_surfaces,
949 };
950
951 void
952 brw_upload_abo_surfaces(struct brw_context *brw,
953 struct gl_shader_program *prog,
954 struct brw_stage_state *stage_state,
955 struct brw_stage_prog_data *prog_data)
956 {
957 struct gl_context *ctx = &brw->ctx;
958 uint32_t *surf_offsets =
959 &stage_state->surf_offset[prog_data->binding_table.abo_start];
960
961 for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
962 struct gl_atomic_buffer_binding *binding =
963 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
964 struct intel_buffer_object *intel_bo =
965 intel_buffer_object(binding->BufferObject);
966 drm_intel_bo *bo = intel_bufferobj_buffer(
967 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
968
969 brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
970 binding->Offset, BRW_SURFACEFORMAT_RAW,
971 bo->size - binding->Offset, 1, true);
972 }
973
974 if (prog->NumAtomicBuffers)
975 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
976 }
977
978 static void
979 brw_upload_wm_abo_surfaces(struct brw_context *brw)
980 {
981 struct gl_context *ctx = &brw->ctx;
982 /* _NEW_PROGRAM */
983 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
984
985 if (prog) {
986 /* BRW_NEW_FS_PROG_DATA */
987 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
988 &brw->wm.prog_data->base);
989 }
990 }
991
992 const struct brw_tracked_state brw_wm_abo_surfaces = {
993 .dirty = {
994 .mesa = _NEW_PROGRAM,
995 .brw = BRW_NEW_ATOMIC_BUFFER |
996 BRW_NEW_BATCH |
997 BRW_NEW_FS_PROG_DATA,
998 },
999 .emit = brw_upload_wm_abo_surfaces,
1000 };
1001
1002 static void
1003 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1004 {
1005 struct gl_context *ctx = &brw->ctx;
1006 /* _NEW_PROGRAM */
1007 struct gl_shader_program *prog =
1008 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1009
1010 if (prog) {
1011 /* BRW_NEW_CS_PROG_DATA */
1012 brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
1013 &brw->cs.prog_data->base);
1014 }
1015 }
1016
1017 const struct brw_tracked_state brw_cs_abo_surfaces = {
1018 .dirty = {
1019 .mesa = _NEW_PROGRAM,
1020 .brw = BRW_NEW_ATOMIC_BUFFER |
1021 BRW_NEW_BATCH |
1022 BRW_NEW_CS_PROG_DATA,
1023 },
1024 .emit = brw_upload_cs_abo_surfaces,
1025 };
1026
1027 static void
1028 brw_upload_cs_image_surfaces(struct brw_context *brw)
1029 {
1030 struct gl_context *ctx = &brw->ctx;
1031 /* _NEW_PROGRAM */
1032 struct gl_shader_program *prog =
1033 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1034
1035 if (prog) {
1036 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
1037 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1038 &brw->cs.base, &brw->cs.prog_data->base);
1039 }
1040 }
1041
1042 const struct brw_tracked_state brw_cs_image_surfaces = {
1043 .dirty = {
1044 .mesa = _NEW_PROGRAM,
1045 .brw = BRW_NEW_BATCH |
1046 BRW_NEW_CS_PROG_DATA |
1047 BRW_NEW_IMAGE_UNITS
1048 },
1049 .emit = brw_upload_cs_image_surfaces,
1050 };
1051
1052 static uint32_t
1053 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1054 {
1055 if (access == GL_WRITE_ONLY) {
1056 return brw_format_for_mesa_format(format);
1057 } else {
1058 /* Typed surface reads support a very limited subset of the shader
1059 * image formats. Translate it into the closest format the
1060 * hardware supports.
1061 */
1062 if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1063 (_mesa_get_format_bytes(format) >= 8 &&
1064 (brw->gen == 7 && !brw->is_haswell)))
1065 return BRW_SURFACEFORMAT_RAW;
1066 else
1067 return brw_format_for_mesa_format(
1068 brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1069 }
1070 }
1071
1072 static void
1073 update_default_image_param(struct brw_context *brw,
1074 struct gl_image_unit *u,
1075 unsigned surface_idx,
1076 struct brw_image_param *param)
1077 {
1078 memset(param, 0, sizeof(*param));
1079 param->surface_idx = surface_idx;
1080 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1081 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1082 * detailed explanation of these parameters.
1083 */
1084 param->swizzling[0] = 0xff;
1085 param->swizzling[1] = 0xff;
1086 }
1087
1088 static void
1089 update_buffer_image_param(struct brw_context *brw,
1090 struct gl_image_unit *u,
1091 unsigned surface_idx,
1092 struct brw_image_param *param)
1093 {
1094 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1095
1096 update_default_image_param(brw, u, surface_idx, param);
1097
1098 param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1099 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1100 }
1101
1102 static void
1103 update_texture_image_param(struct brw_context *brw,
1104 struct gl_image_unit *u,
1105 unsigned surface_idx,
1106 struct brw_image_param *param)
1107 {
1108 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1109
1110 update_default_image_param(brw, u, surface_idx, param);
1111
1112 param->size[0] = minify(mt->logical_width0, u->Level);
1113 param->size[1] = minify(mt->logical_height0, u->Level);
1114 param->size[2] = (!u->Layered ? 1 :
1115 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1116 u->TexObj->Target == GL_TEXTURE_3D ?
1117 minify(mt->logical_depth0, u->Level) :
1118 mt->logical_depth0);
1119
1120 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1121 &param->offset[0],
1122 &param->offset[1]);
1123
1124 param->stride[0] = mt->cpp;
1125 param->stride[1] = mt->pitch / mt->cpp;
1126 param->stride[2] =
1127 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1128 param->stride[3] =
1129 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1130
1131 if (mt->tiling == I915_TILING_X) {
1132 /* An X tile is a rectangular block of 512x8 bytes. */
1133 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1134 param->tiling[1] = _mesa_logbase2(8);
1135
1136 if (brw->has_swizzling) {
1137 /* Right shifts required to swizzle bits 9 and 10 of the memory
1138 * address with bit 6.
1139 */
1140 param->swizzling[0] = 3;
1141 param->swizzling[1] = 4;
1142 }
1143 } else if (mt->tiling == I915_TILING_Y) {
1144 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1145 * different to the layout of an X-tiled surface, we simply pretend that
1146 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1147 * one arranged in X-major order just like is the case for X-tiling.
1148 */
1149 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1150 param->tiling[1] = _mesa_logbase2(32);
1151
1152 if (brw->has_swizzling) {
1153 /* Right shift required to swizzle bit 9 of the memory address with
1154 * bit 6.
1155 */
1156 param->swizzling[0] = 3;
1157 }
1158 }
1159
1160 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1161 * address calculation algorithm (emit_address_calculation() in
1162 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1163 * modulus equal to the LOD.
1164 */
1165 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1166 0);
1167 }
1168
1169 static void
1170 update_image_surface(struct brw_context *brw,
1171 struct gl_image_unit *u,
1172 GLenum access,
1173 unsigned surface_idx,
1174 uint32_t *surf_offset,
1175 struct brw_image_param *param)
1176 {
1177 if (u->_Valid) {
1178 struct gl_texture_object *obj = u->TexObj;
1179 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1180
1181 if (obj->Target == GL_TEXTURE_BUFFER) {
1182 struct intel_buffer_object *intel_obj =
1183 intel_buffer_object(obj->BufferObject);
1184 const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1185 _mesa_get_format_bytes(u->_ActualFormat));
1186
1187 brw->vtbl.emit_buffer_surface_state(
1188 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1189 format, intel_obj->Base.Size / texel_size, texel_size,
1190 access != GL_READ_ONLY);
1191
1192 update_buffer_image_param(brw, u, surface_idx, param);
1193
1194 } else {
1195 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1196 struct intel_mipmap_tree *mt = intel_obj->mt;
1197
1198 if (format == BRW_SURFACEFORMAT_RAW) {
1199 brw->vtbl.emit_buffer_surface_state(
1200 brw, surf_offset, mt->bo, mt->offset,
1201 format, mt->bo->size - mt->offset, 1 /* pitch */,
1202 access != GL_READ_ONLY);
1203
1204 } else {
1205 const unsigned min_layer = obj->MinLayer + u->_Layer;
1206 const unsigned min_level = obj->MinLevel + u->Level;
1207 const unsigned num_layers = (!u->Layered ? 1 :
1208 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1209 mt->logical_depth0);
1210 const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1211 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1212 GL_TEXTURE_2D_ARRAY : obj->Target);
1213
1214 brw->vtbl.emit_texture_surface_state(
1215 brw, mt, target,
1216 min_layer, min_layer + num_layers,
1217 min_level, min_level + 1,
1218 format, SWIZZLE_XYZW,
1219 surf_offset, access != GL_READ_ONLY, false);
1220 }
1221
1222 update_texture_image_param(brw, u, surface_idx, param);
1223 }
1224
1225 } else {
1226 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1227 update_default_image_param(brw, u, surface_idx, param);
1228 }
1229 }
1230
1231 void
1232 brw_upload_image_surfaces(struct brw_context *brw,
1233 struct gl_shader *shader,
1234 struct brw_stage_state *stage_state,
1235 struct brw_stage_prog_data *prog_data)
1236 {
1237 struct gl_context *ctx = &brw->ctx;
1238
1239 if (shader && shader->NumImages) {
1240 for (unsigned i = 0; i < shader->NumImages; i++) {
1241 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1242 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1243
1244 update_image_surface(brw, u, shader->ImageAccess[i],
1245 surf_idx,
1246 &stage_state->surf_offset[surf_idx],
1247 &prog_data->image_param[i]);
1248 }
1249
1250 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1251 }
1252 }
1253
1254 static void
1255 brw_upload_wm_image_surfaces(struct brw_context *brw)
1256 {
1257 struct gl_context *ctx = &brw->ctx;
1258 /* BRW_NEW_FRAGMENT_PROGRAM */
1259 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
1260
1261 if (prog) {
1262 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
1263 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1264 &brw->wm.base, &brw->wm.prog_data->base);
1265 }
1266 }
1267
1268 const struct brw_tracked_state brw_wm_image_surfaces = {
1269 .dirty = {
1270 .brw = BRW_NEW_BATCH |
1271 BRW_NEW_FRAGMENT_PROGRAM |
1272 BRW_NEW_FS_PROG_DATA |
1273 BRW_NEW_IMAGE_UNITS
1274 },
1275 .emit = brw_upload_wm_image_surfaces,
1276 };
1277
1278 void
1279 gen4_init_vtable_surface_functions(struct brw_context *brw)
1280 {
1281 brw->vtbl.update_texture_surface = brw_update_texture_surface;
1282 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1283 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1284 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1285 }