ec4dfdb2229e2474084c1800ced0b59156e492f4
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 unreachable("not reached");
77 }
78 }
79
80 uint32_t
81 brw_get_surface_tiling_bits(uint32_t tiling)
82 {
83 switch (tiling) {
84 case I915_TILING_X:
85 return BRW_SURFACE_TILED;
86 case I915_TILING_Y:
87 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
88 default:
89 return 0;
90 }
91 }
92
93
94 uint32_t
95 brw_get_surface_num_multisamples(unsigned num_samples)
96 {
97 if (num_samples > 1)
98 return BRW_SURFACE_MULTISAMPLECOUNT_4;
99 else
100 return BRW_SURFACE_MULTISAMPLECOUNT_1;
101 }
102
103 void
104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
105 bool is_render_target,
106 unsigned *width, unsigned *height,
107 unsigned *pitch, uint32_t *tiling, unsigned *format)
108 {
109 static const unsigned halign_stencil = 8;
110
111 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
112 * there are half as many rows.
113 * In addition, mip-levels are accessed manually by the program and
114 * therefore the surface is setup to cover all the mip-levels for one slice.
115 * (Hardware is still used to access individual slices).
116 */
117 *tiling = I915_TILING_Y;
118 *pitch = mt->pitch * 2;
119 *width = ALIGN(mt->total_width, halign_stencil) * 2;
120 *height = (mt->total_height / mt->physical_depth0) / 2;
121
122 if (is_render_target) {
123 *format = BRW_SURFACEFORMAT_R8_UINT;
124 }
125 }
126
127
128 /**
129 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
130 * swizzling.
131 */
132 int
133 brw_get_texture_swizzle(const struct gl_context *ctx,
134 const struct gl_texture_object *t)
135 {
136 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
137
138 int swizzles[SWIZZLE_NIL + 1] = {
139 SWIZZLE_X,
140 SWIZZLE_Y,
141 SWIZZLE_Z,
142 SWIZZLE_W,
143 SWIZZLE_ZERO,
144 SWIZZLE_ONE,
145 SWIZZLE_NIL
146 };
147
148 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
149 img->_BaseFormat == GL_DEPTH_STENCIL) {
150 GLenum depth_mode = t->DepthMode;
151
152 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
153 * with depth component data specified with a sized internal format.
154 * Otherwise, it's left at the old default, GL_LUMINANCE.
155 */
156 if (_mesa_is_gles3(ctx) &&
157 img->InternalFormat != GL_DEPTH_COMPONENT &&
158 img->InternalFormat != GL_DEPTH_STENCIL) {
159 depth_mode = GL_RED;
160 }
161
162 switch (depth_mode) {
163 case GL_ALPHA:
164 swizzles[0] = SWIZZLE_ZERO;
165 swizzles[1] = SWIZZLE_ZERO;
166 swizzles[2] = SWIZZLE_ZERO;
167 swizzles[3] = SWIZZLE_X;
168 break;
169 case GL_LUMINANCE:
170 swizzles[0] = SWIZZLE_X;
171 swizzles[1] = SWIZZLE_X;
172 swizzles[2] = SWIZZLE_X;
173 swizzles[3] = SWIZZLE_ONE;
174 break;
175 case GL_INTENSITY:
176 swizzles[0] = SWIZZLE_X;
177 swizzles[1] = SWIZZLE_X;
178 swizzles[2] = SWIZZLE_X;
179 swizzles[3] = SWIZZLE_X;
180 break;
181 case GL_RED:
182 swizzles[0] = SWIZZLE_X;
183 swizzles[1] = SWIZZLE_ZERO;
184 swizzles[2] = SWIZZLE_ZERO;
185 swizzles[3] = SWIZZLE_ONE;
186 break;
187 }
188 }
189
190 /* If the texture's format is alpha-only, force R, G, and B to
191 * 0.0. Similarly, if the texture's format has no alpha channel,
192 * force the alpha value read to 1.0. This allows for the
193 * implementation to use an RGBA texture for any of these formats
194 * without leaking any unexpected values.
195 */
196 switch (img->_BaseFormat) {
197 case GL_ALPHA:
198 swizzles[0] = SWIZZLE_ZERO;
199 swizzles[1] = SWIZZLE_ZERO;
200 swizzles[2] = SWIZZLE_ZERO;
201 break;
202 case GL_LUMINANCE:
203 if (t->_IsIntegerFormat) {
204 swizzles[0] = SWIZZLE_X;
205 swizzles[1] = SWIZZLE_X;
206 swizzles[2] = SWIZZLE_X;
207 swizzles[3] = SWIZZLE_ONE;
208 }
209 break;
210 case GL_RED:
211 case GL_RG:
212 case GL_RGB:
213 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
214 swizzles[3] = SWIZZLE_ONE;
215 break;
216 }
217
218 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
219 swizzles[GET_SWZ(t->_Swizzle, 1)],
220 swizzles[GET_SWZ(t->_Swizzle, 2)],
221 swizzles[GET_SWZ(t->_Swizzle, 3)]);
222 }
223
224 static void
225 gen4_emit_buffer_surface_state(struct brw_context *brw,
226 uint32_t *out_offset,
227 drm_intel_bo *bo,
228 unsigned buffer_offset,
229 unsigned surface_format,
230 unsigned buffer_size,
231 unsigned pitch,
232 bool rw)
233 {
234 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
235 6 * 4, 32, out_offset);
236 memset(surf, 0, 6 * 4);
237
238 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
239 surface_format << BRW_SURFACE_FORMAT_SHIFT |
240 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
241 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
242 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
243 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
244 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
245 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
246
247 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
248 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
249 * physical cache. It is mapped in hardware to the sampler cache."
250 */
251 if (bo) {
252 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
253 bo, buffer_offset,
254 I915_GEM_DOMAIN_SAMPLER,
255 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
256 }
257 }
258
259 void
260 brw_update_buffer_texture_surface(struct gl_context *ctx,
261 unsigned unit,
262 uint32_t *surf_offset)
263 {
264 struct brw_context *brw = brw_context(ctx);
265 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
266 struct intel_buffer_object *intel_obj =
267 intel_buffer_object(tObj->BufferObject);
268 uint32_t size = tObj->BufferSize;
269 drm_intel_bo *bo = NULL;
270 mesa_format format = tObj->_BufferObjectFormat;
271 uint32_t brw_format = brw_format_for_mesa_format(format);
272 int texel_size = _mesa_get_format_bytes(format);
273
274 if (intel_obj) {
275 size = MIN2(size, intel_obj->Base.Size);
276 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
277 }
278
279 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
280 _mesa_problem(NULL, "bad format %s for texture buffer\n",
281 _mesa_get_format_name(format));
282 }
283
284 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
285 tObj->BufferOffset,
286 brw_format,
287 size / texel_size,
288 texel_size,
289 false /* rw */);
290 }
291
292 static void
293 brw_update_texture_surface(struct gl_context *ctx,
294 unsigned unit,
295 uint32_t *surf_offset,
296 bool for_gather)
297 {
298 struct brw_context *brw = brw_context(ctx);
299 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
300 struct intel_texture_object *intelObj = intel_texture_object(tObj);
301 struct intel_mipmap_tree *mt = intelObj->mt;
302 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
303 uint32_t *surf;
304
305 /* BRW_NEW_TEXTURE_BUFFER */
306 if (tObj->Target == GL_TEXTURE_BUFFER) {
307 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
308 return;
309 }
310
311 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
312 6 * 4, 32, surf_offset);
313
314 uint32_t tex_format = translate_tex_format(brw, mt->format,
315 sampler->sRGBDecode);
316
317 if (for_gather) {
318 /* Sandybridge's gather4 message is broken for integer formats.
319 * To work around this, we pretend the surface is UNORM for
320 * 8 or 16-bit formats, and emit shader instructions to recover
321 * the real INT/UINT value. For 32-bit formats, we pretend
322 * the surface is FLOAT, and simply reinterpret the resulting
323 * bits.
324 */
325 switch (tex_format) {
326 case BRW_SURFACEFORMAT_R8_SINT:
327 case BRW_SURFACEFORMAT_R8_UINT:
328 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
329 break;
330
331 case BRW_SURFACEFORMAT_R16_SINT:
332 case BRW_SURFACEFORMAT_R16_UINT:
333 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
334 break;
335
336 case BRW_SURFACEFORMAT_R32_SINT:
337 case BRW_SURFACEFORMAT_R32_UINT:
338 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
339 break;
340
341 default:
342 break;
343 }
344 }
345
346 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
347 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
348 BRW_SURFACE_CUBEFACE_ENABLES |
349 tex_format << BRW_SURFACE_FORMAT_SHIFT);
350
351 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
352
353 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
354 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
355 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
356
357 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
358 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
359 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
360
361 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
362 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
363
364 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
365
366 /* Emit relocation to surface contents */
367 drm_intel_bo_emit_reloc(brw->batch.bo,
368 *surf_offset + 4,
369 mt->bo,
370 surf[1] - mt->bo->offset64,
371 I915_GEM_DOMAIN_SAMPLER, 0);
372 }
373
374 /**
375 * Create the constant buffer surface. Vertex/fragment shader constants will be
376 * read from this buffer with Data Port Read instructions/messages.
377 */
378 void
379 brw_create_constant_surface(struct brw_context *brw,
380 drm_intel_bo *bo,
381 uint32_t offset,
382 uint32_t size,
383 uint32_t *out_offset,
384 bool dword_pitch)
385 {
386 uint32_t stride = dword_pitch ? 4 : 16;
387 uint32_t elements = ALIGN(size, stride) / stride;
388
389 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
390 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
391 elements, stride, false);
392 }
393
394 /**
395 * Set up a binding table entry for use by stream output logic (transform
396 * feedback).
397 *
398 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
399 */
400 void
401 brw_update_sol_surface(struct brw_context *brw,
402 struct gl_buffer_object *buffer_obj,
403 uint32_t *out_offset, unsigned num_vector_components,
404 unsigned stride_dwords, unsigned offset_dwords)
405 {
406 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
407 uint32_t offset_bytes = 4 * offset_dwords;
408 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
409 offset_bytes,
410 buffer_obj->Size - offset_bytes);
411 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
412 out_offset);
413 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
414 size_t size_dwords = buffer_obj->Size / 4;
415 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
416
417 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
418 * too big to map using a single binding table entry?
419 */
420 assert((size_dwords - offset_dwords) / stride_dwords
421 <= BRW_MAX_NUM_BUFFER_ENTRIES);
422
423 if (size_dwords > offset_dwords + num_vector_components) {
424 /* There is room for at least 1 transform feedback output in the buffer.
425 * Compute the number of additional transform feedback outputs the
426 * buffer has room for.
427 */
428 buffer_size_minus_1 =
429 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
430 } else {
431 /* There isn't even room for a single transform feedback output in the
432 * buffer. We can't configure the binding table entry to prevent output
433 * entirely; we'll have to rely on the geometry shader to detect
434 * overflow. But to minimize the damage in case of a bug, set up the
435 * binding table entry to just allow a single output.
436 */
437 buffer_size_minus_1 = 0;
438 }
439 width = buffer_size_minus_1 & 0x7f;
440 height = (buffer_size_minus_1 & 0xfff80) >> 7;
441 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
442
443 switch (num_vector_components) {
444 case 1:
445 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
446 break;
447 case 2:
448 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
449 break;
450 case 3:
451 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
452 break;
453 case 4:
454 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
455 break;
456 default:
457 unreachable("Invalid vector size for transform feedback output");
458 }
459
460 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
461 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
462 surface_format << BRW_SURFACE_FORMAT_SHIFT |
463 BRW_SURFACE_RC_READ_WRITE;
464 surf[1] = bo->offset64 + offset_bytes; /* reloc */
465 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
466 height << BRW_SURFACE_HEIGHT_SHIFT);
467 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
468 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
469 surf[4] = 0;
470 surf[5] = 0;
471
472 /* Emit relocation to surface contents. */
473 drm_intel_bo_emit_reloc(brw->batch.bo,
474 *out_offset + 4,
475 bo, offset_bytes,
476 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
477 }
478
479 /* Creates a new WM constant buffer reflecting the current fragment program's
480 * constants, if needed by the fragment program.
481 *
482 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
483 * state atom.
484 */
485 static void
486 brw_upload_wm_pull_constants(struct brw_context *brw)
487 {
488 struct brw_stage_state *stage_state = &brw->wm.base;
489 /* BRW_NEW_FRAGMENT_PROGRAM */
490 struct brw_fragment_program *fp =
491 (struct brw_fragment_program *) brw->fragment_program;
492 /* BRW_NEW_FS_PROG_DATA */
493 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
494
495 /* _NEW_PROGRAM_CONSTANTS */
496 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
497 stage_state, prog_data, true);
498 }
499
500 const struct brw_tracked_state brw_wm_pull_constants = {
501 .dirty = {
502 .mesa = _NEW_PROGRAM_CONSTANTS,
503 .brw = BRW_NEW_BATCH |
504 BRW_NEW_FRAGMENT_PROGRAM |
505 BRW_NEW_FS_PROG_DATA,
506 },
507 .emit = brw_upload_wm_pull_constants,
508 };
509
510 /**
511 * Creates a null renderbuffer surface.
512 *
513 * This is used when the shader doesn't write to any color output. An FB
514 * write to target 0 will still be emitted, because that's how the thread is
515 * terminated (and computed depth is returned), so we need to have the
516 * hardware discard the target 0 color output..
517 */
518 static void
519 brw_emit_null_surface_state(struct brw_context *brw,
520 unsigned width,
521 unsigned height,
522 unsigned samples,
523 uint32_t *out_offset)
524 {
525 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
526 * Notes):
527 *
528 * A null surface will be used in instances where an actual surface is
529 * not bound. When a write message is generated to a null surface, no
530 * actual surface is written to. When a read message (including any
531 * sampling engine message) is generated to a null surface, the result
532 * is all zeros. Note that a null surface type is allowed to be used
533 * with all messages, even if it is not specificially indicated as
534 * supported. All of the remaining fields in surface state are ignored
535 * for null surfaces, with the following exceptions:
536 *
537 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
538 * depth buffer’s corresponding state for all render target surfaces,
539 * including null.
540 *
541 * - Surface Format must be R8G8B8A8_UNORM.
542 */
543 unsigned surface_type = BRW_SURFACE_NULL;
544 drm_intel_bo *bo = NULL;
545 unsigned pitch_minus_1 = 0;
546 uint32_t multisampling_state = 0;
547 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
548 out_offset);
549
550 if (samples > 1) {
551 /* On Gen6, null render targets seem to cause GPU hangs when
552 * multisampling. So work around this problem by rendering into dummy
553 * color buffer.
554 *
555 * To decrease the amount of memory needed by the workaround buffer, we
556 * set its pitch to 128 bytes (the width of a Y tile). This means that
557 * the amount of memory needed for the workaround buffer is
558 * (width_in_tiles + height_in_tiles - 1) tiles.
559 *
560 * Note that since the workaround buffer will be interpreted by the
561 * hardware as an interleaved multisampled buffer, we need to compute
562 * width_in_tiles and height_in_tiles by dividing the width and height
563 * by 16 rather than the normal Y-tile size of 32.
564 */
565 unsigned width_in_tiles = ALIGN(width, 16) / 16;
566 unsigned height_in_tiles = ALIGN(height, 16) / 16;
567 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
568 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
569 size_needed);
570 bo = brw->wm.multisampled_null_render_target_bo;
571 surface_type = BRW_SURFACE_2D;
572 pitch_minus_1 = 127;
573 multisampling_state = brw_get_surface_num_multisamples(samples);
574 }
575
576 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
577 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
578 if (brw->gen < 6) {
579 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
580 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
581 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
582 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
583 }
584 surf[1] = bo ? bo->offset64 : 0;
585 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
586 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
587
588 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
589 * Notes):
590 *
591 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
592 */
593 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
594 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
595 surf[4] = multisampling_state;
596 surf[5] = 0;
597
598 if (bo) {
599 drm_intel_bo_emit_reloc(brw->batch.bo,
600 *out_offset + 4,
601 bo, 0,
602 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
603 }
604 }
605
606 /**
607 * Sets up a surface state structure to point at the given region.
608 * While it is only used for the front/back buffer currently, it should be
609 * usable for further buffers when doing ARB_draw_buffer support.
610 */
611 static void
612 brw_update_renderbuffer_surface(struct brw_context *brw,
613 struct gl_renderbuffer *rb,
614 bool layered,
615 unsigned int unit)
616 {
617 struct gl_context *ctx = &brw->ctx;
618 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
619 struct intel_mipmap_tree *mt = irb->mt;
620 uint32_t *surf;
621 uint32_t tile_x, tile_y;
622 uint32_t format = 0;
623 /* _NEW_BUFFERS */
624 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
625 /* BRW_NEW_FS_PROG_DATA */
626 uint32_t surf_index =
627 brw->wm.prog_data->binding_table.render_target_start + unit;
628
629 assert(!layered);
630
631 if (rb->TexImage && !brw->has_surface_tile_offset) {
632 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
633
634 if (tile_x != 0 || tile_y != 0) {
635 /* Original gen4 hardware couldn't draw to a non-tile-aligned
636 * destination in a miptree unless you actually setup your renderbuffer
637 * as a miptree and used the fragile lod/array_index/etc. controls to
638 * select the image. So, instead, we just make a new single-level
639 * miptree and render into that.
640 */
641 intel_renderbuffer_move_to_temp(brw, irb, false);
642 mt = irb->mt;
643 }
644 }
645
646 intel_miptree_used_for_rendering(irb->mt);
647
648 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
649 &brw->wm.base.surf_offset[surf_index]);
650
651 format = brw->render_target_format[rb_format];
652 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
653 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
654 __FUNCTION__, _mesa_get_format_name(rb_format));
655 }
656
657 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
658 format << BRW_SURFACE_FORMAT_SHIFT);
659
660 /* reloc */
661 assert(mt->offset % mt->cpp == 0);
662 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
663 mt->bo->offset64 + mt->offset);
664
665 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
666 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
667
668 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
669 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
670
671 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
672
673 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
674 /* Note that the low bits of these fields are missing, so
675 * there's the possibility of getting in trouble.
676 */
677 assert(tile_x % 4 == 0);
678 assert(tile_y % 2 == 0);
679 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
680 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
681 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
682
683 if (brw->gen < 6) {
684 /* _NEW_COLOR */
685 if (!ctx->Color.ColorLogicOpEnabled &&
686 (ctx->Color.BlendEnabled & (1 << unit)))
687 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
688
689 if (!ctx->Color.ColorMask[unit][0])
690 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
691 if (!ctx->Color.ColorMask[unit][1])
692 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
693 if (!ctx->Color.ColorMask[unit][2])
694 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
695
696 /* As mentioned above, disable writes to the alpha component when the
697 * renderbuffer is XRGB.
698 */
699 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
700 !ctx->Color.ColorMask[unit][3]) {
701 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
702 }
703 }
704
705 drm_intel_bo_emit_reloc(brw->batch.bo,
706 brw->wm.base.surf_offset[surf_index] + 4,
707 mt->bo,
708 surf[1] - mt->bo->offset64,
709 I915_GEM_DOMAIN_RENDER,
710 I915_GEM_DOMAIN_RENDER);
711 }
712
713 /**
714 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
715 */
716 static void
717 brw_update_renderbuffer_surfaces(struct brw_context *brw)
718 {
719 struct gl_context *ctx = &brw->ctx;
720 /* _NEW_BUFFERS */
721 const struct gl_framebuffer *fb = ctx->DrawBuffer;
722 GLuint i;
723
724 /* _NEW_BUFFERS | _NEW_COLOR */
725 /* Update surfaces for drawing buffers */
726 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
727 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
728 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
729 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
730 ctx->DrawBuffer->MaxNumLayers > 0, i);
731 } else {
732 const uint32_t surf_index =
733 brw->wm.prog_data->binding_table.render_target_start + i;
734
735 brw->vtbl.emit_null_surface_state(
736 brw, fb->Width, fb->Height, fb->Visual.samples,
737 &brw->wm.base.surf_offset[surf_index]);
738 }
739 }
740 } else {
741 const uint32_t surf_index =
742 brw->wm.prog_data->binding_table.render_target_start;
743
744 brw->vtbl.emit_null_surface_state(
745 brw, fb->Width, fb->Height, fb->Visual.samples,
746 &brw->wm.base.surf_offset[surf_index]);
747 }
748 brw->state.dirty.brw |= BRW_NEW_SURFACES;
749 }
750
751 const struct brw_tracked_state brw_renderbuffer_surfaces = {
752 .dirty = {
753 .mesa = _NEW_BUFFERS |
754 _NEW_COLOR,
755 .brw = BRW_NEW_BATCH |
756 BRW_NEW_FS_PROG_DATA,
757 },
758 .emit = brw_update_renderbuffer_surfaces,
759 };
760
761 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
762 .dirty = {
763 .mesa = _NEW_BUFFERS,
764 .brw = BRW_NEW_BATCH,
765 },
766 .emit = brw_update_renderbuffer_surfaces,
767 };
768
769
770 static void
771 update_stage_texture_surfaces(struct brw_context *brw,
772 const struct gl_program *prog,
773 struct brw_stage_state *stage_state,
774 bool for_gather)
775 {
776 if (!prog)
777 return;
778
779 struct gl_context *ctx = &brw->ctx;
780
781 uint32_t *surf_offset = stage_state->surf_offset;
782
783 /* BRW_NEW_*_PROG_DATA */
784 if (for_gather)
785 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
786 else
787 surf_offset += stage_state->prog_data->binding_table.texture_start;
788
789 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
790 for (unsigned s = 0; s < num_samplers; s++) {
791 surf_offset[s] = 0;
792
793 if (prog->SamplersUsed & (1 << s)) {
794 const unsigned unit = prog->SamplerUnits[s];
795
796 /* _NEW_TEXTURE */
797 if (ctx->Texture.Unit[unit]._Current) {
798 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
799 }
800 }
801 }
802 }
803
804
805 /**
806 * Construct SURFACE_STATE objects for enabled textures.
807 */
808 static void
809 brw_update_texture_surfaces(struct brw_context *brw)
810 {
811 /* BRW_NEW_VERTEX_PROGRAM */
812 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
813
814 /* BRW_NEW_GEOMETRY_PROGRAM */
815 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
816
817 /* BRW_NEW_FRAGMENT_PROGRAM */
818 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
819
820 /* _NEW_TEXTURE */
821 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
822 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
823 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
824
825 /* emit alternate set of surface state for gather. this
826 * allows the surface format to be overriden for only the
827 * gather4 messages. */
828 if (brw->gen < 8) {
829 if (vs && vs->UsesGather)
830 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
831 if (gs && gs->UsesGather)
832 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
833 if (fs && fs->UsesGather)
834 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
835 }
836
837 brw->state.dirty.brw |= BRW_NEW_SURFACES;
838 }
839
840 const struct brw_tracked_state brw_texture_surfaces = {
841 .dirty = {
842 .mesa = _NEW_TEXTURE,
843 .brw = BRW_NEW_BATCH |
844 BRW_NEW_FRAGMENT_PROGRAM |
845 BRW_NEW_FS_PROG_DATA |
846 BRW_NEW_GEOMETRY_PROGRAM |
847 BRW_NEW_GS_PROG_DATA |
848 BRW_NEW_TEXTURE_BUFFER |
849 BRW_NEW_VERTEX_PROGRAM |
850 BRW_NEW_VS_PROG_DATA,
851 },
852 .emit = brw_update_texture_surfaces,
853 };
854
855 void
856 brw_upload_ubo_surfaces(struct brw_context *brw,
857 struct gl_shader *shader,
858 struct brw_stage_state *stage_state,
859 struct brw_stage_prog_data *prog_data,
860 bool dword_pitch)
861 {
862 struct gl_context *ctx = &brw->ctx;
863
864 if (!shader)
865 return;
866
867 uint32_t *surf_offsets =
868 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
869
870 for (int i = 0; i < shader->NumUniformBlocks; i++) {
871 struct gl_uniform_buffer_binding *binding;
872 struct intel_buffer_object *intel_bo;
873
874 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
875 intel_bo = intel_buffer_object(binding->BufferObject);
876 drm_intel_bo *bo =
877 intel_bufferobj_buffer(brw, intel_bo,
878 binding->Offset,
879 binding->BufferObject->Size - binding->Offset);
880
881 /* Because behavior for referencing outside of the binding's size in the
882 * glBindBufferRange case is undefined, we can just bind the whole buffer
883 * glBindBufferBase wants and be a correct implementation.
884 */
885 brw_create_constant_surface(brw, bo, binding->Offset,
886 bo->size - binding->Offset,
887 &surf_offsets[i],
888 dword_pitch);
889 }
890
891 if (shader->NumUniformBlocks)
892 brw->state.dirty.brw |= BRW_NEW_SURFACES;
893 }
894
895 static void
896 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
897 {
898 struct gl_context *ctx = &brw->ctx;
899 /* _NEW_PROGRAM */
900 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
901
902 if (!prog)
903 return;
904
905 /* BRW_NEW_FS_PROG_DATA */
906 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
907 &brw->wm.base, &brw->wm.prog_data->base, true);
908 }
909
910 const struct brw_tracked_state brw_wm_ubo_surfaces = {
911 .dirty = {
912 .mesa = _NEW_PROGRAM,
913 .brw = BRW_NEW_BATCH |
914 BRW_NEW_FS_PROG_DATA |
915 BRW_NEW_UNIFORM_BUFFER,
916 },
917 .emit = brw_upload_wm_ubo_surfaces,
918 };
919
920 void
921 brw_upload_abo_surfaces(struct brw_context *brw,
922 struct gl_shader_program *prog,
923 struct brw_stage_state *stage_state,
924 struct brw_stage_prog_data *prog_data)
925 {
926 struct gl_context *ctx = &brw->ctx;
927 uint32_t *surf_offsets =
928 &stage_state->surf_offset[prog_data->binding_table.abo_start];
929
930 for (int i = 0; i < prog->NumAtomicBuffers; i++) {
931 struct gl_atomic_buffer_binding *binding =
932 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
933 struct intel_buffer_object *intel_bo =
934 intel_buffer_object(binding->BufferObject);
935 drm_intel_bo *bo = intel_bufferobj_buffer(
936 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
937
938 brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
939 bo->size - binding->Offset,
940 &surf_offsets[i], true);
941 }
942
943 if (prog->NumAtomicBuffers)
944 brw->state.dirty.brw |= BRW_NEW_SURFACES;
945 }
946
947 static void
948 brw_upload_wm_abo_surfaces(struct brw_context *brw)
949 {
950 struct gl_context *ctx = &brw->ctx;
951 /* _NEW_PROGRAM */
952 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
953
954 if (prog) {
955 /* BRW_NEW_FS_PROG_DATA */
956 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
957 &brw->wm.prog_data->base);
958 }
959 }
960
961 const struct brw_tracked_state brw_wm_abo_surfaces = {
962 .dirty = {
963 .mesa = _NEW_PROGRAM,
964 .brw = BRW_NEW_ATOMIC_BUFFER |
965 BRW_NEW_BATCH |
966 BRW_NEW_FS_PROG_DATA,
967 },
968 .emit = brw_upload_wm_abo_surfaces,
969 };
970
971 void
972 gen4_init_vtable_surface_functions(struct brw_context *brw)
973 {
974 brw->vtbl.update_texture_surface = brw_update_texture_surface;
975 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
976 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
977 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
978 }