i965/wm: Surface state overrides for configuring w-tiled as y-tiled
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104 void
105 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
106 bool is_render_target,
107 unsigned *width, unsigned *height,
108 unsigned *pitch, uint32_t *tiling, unsigned *format)
109 {
110 static const unsigned halign_stencil = 8;
111
112 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
113 * there are half as many rows.
114 * In addition, mip-levels are accessed manually by the program and
115 * therefore the surface is setup to cover all the mip-levels for one slice.
116 * (Hardware is still used to access individual slices).
117 */
118 *tiling = I915_TILING_Y;
119 *pitch = mt->pitch * 2;
120 *width = ALIGN(mt->total_width, halign_stencil) * 2;
121 *height = (mt->total_height / mt->physical_depth0) / 2;
122
123 if (is_render_target) {
124 *format = BRW_SURFACEFORMAT_R8_UINT;
125 }
126 }
127
128
129 /**
130 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
131 * swizzling.
132 */
133 int
134 brw_get_texture_swizzle(const struct gl_context *ctx,
135 const struct gl_texture_object *t)
136 {
137 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
138
139 int swizzles[SWIZZLE_NIL + 1] = {
140 SWIZZLE_X,
141 SWIZZLE_Y,
142 SWIZZLE_Z,
143 SWIZZLE_W,
144 SWIZZLE_ZERO,
145 SWIZZLE_ONE,
146 SWIZZLE_NIL
147 };
148
149 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
150 img->_BaseFormat == GL_DEPTH_STENCIL) {
151 GLenum depth_mode = t->DepthMode;
152
153 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
154 * with depth component data specified with a sized internal format.
155 * Otherwise, it's left at the old default, GL_LUMINANCE.
156 */
157 if (_mesa_is_gles3(ctx) &&
158 img->InternalFormat != GL_DEPTH_COMPONENT &&
159 img->InternalFormat != GL_DEPTH_STENCIL) {
160 depth_mode = GL_RED;
161 }
162
163 switch (depth_mode) {
164 case GL_ALPHA:
165 swizzles[0] = SWIZZLE_ZERO;
166 swizzles[1] = SWIZZLE_ZERO;
167 swizzles[2] = SWIZZLE_ZERO;
168 swizzles[3] = SWIZZLE_X;
169 break;
170 case GL_LUMINANCE:
171 swizzles[0] = SWIZZLE_X;
172 swizzles[1] = SWIZZLE_X;
173 swizzles[2] = SWIZZLE_X;
174 swizzles[3] = SWIZZLE_ONE;
175 break;
176 case GL_INTENSITY:
177 swizzles[0] = SWIZZLE_X;
178 swizzles[1] = SWIZZLE_X;
179 swizzles[2] = SWIZZLE_X;
180 swizzles[3] = SWIZZLE_X;
181 break;
182 case GL_RED:
183 swizzles[0] = SWIZZLE_X;
184 swizzles[1] = SWIZZLE_ZERO;
185 swizzles[2] = SWIZZLE_ZERO;
186 swizzles[3] = SWIZZLE_ONE;
187 break;
188 }
189 }
190
191 /* If the texture's format is alpha-only, force R, G, and B to
192 * 0.0. Similarly, if the texture's format has no alpha channel,
193 * force the alpha value read to 1.0. This allows for the
194 * implementation to use an RGBA texture for any of these formats
195 * without leaking any unexpected values.
196 */
197 switch (img->_BaseFormat) {
198 case GL_ALPHA:
199 swizzles[0] = SWIZZLE_ZERO;
200 swizzles[1] = SWIZZLE_ZERO;
201 swizzles[2] = SWIZZLE_ZERO;
202 break;
203 case GL_RED:
204 case GL_RG:
205 case GL_RGB:
206 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
207 swizzles[3] = SWIZZLE_ONE;
208 break;
209 }
210
211 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
212 swizzles[GET_SWZ(t->_Swizzle, 1)],
213 swizzles[GET_SWZ(t->_Swizzle, 2)],
214 swizzles[GET_SWZ(t->_Swizzle, 3)]);
215 }
216
217 static void
218 gen4_emit_buffer_surface_state(struct brw_context *brw,
219 uint32_t *out_offset,
220 drm_intel_bo *bo,
221 unsigned buffer_offset,
222 unsigned surface_format,
223 unsigned buffer_size,
224 unsigned pitch,
225 unsigned mocs,
226 bool rw)
227 {
228 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
229 6 * 4, 32, out_offset);
230 memset(surf, 0, 6 * 4);
231
232 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
233 surface_format << BRW_SURFACE_FORMAT_SHIFT |
234 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
235 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
236 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
237 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
238 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
239 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
240
241 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
242 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
243 * physical cache. It is mapped in hardware to the sampler cache."
244 */
245 if (bo) {
246 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
247 bo, buffer_offset,
248 I915_GEM_DOMAIN_SAMPLER,
249 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
250 }
251 }
252
253 void
254 brw_update_buffer_texture_surface(struct gl_context *ctx,
255 unsigned unit,
256 uint32_t *surf_offset)
257 {
258 struct brw_context *brw = brw_context(ctx);
259 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
260 struct intel_buffer_object *intel_obj =
261 intel_buffer_object(tObj->BufferObject);
262 uint32_t size = tObj->BufferSize;
263 drm_intel_bo *bo = NULL;
264 mesa_format format = tObj->_BufferObjectFormat;
265 uint32_t brw_format = brw_format_for_mesa_format(format);
266 int texel_size = _mesa_get_format_bytes(format);
267
268 if (intel_obj) {
269 size = MIN2(size, intel_obj->Base.Size);
270 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
271 }
272
273 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
274 _mesa_problem(NULL, "bad format %s for texture buffer\n",
275 _mesa_get_format_name(format));
276 }
277
278 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
279 tObj->BufferOffset,
280 brw_format,
281 size / texel_size,
282 texel_size,
283 0, /* mocs */
284 false /* rw */);
285 }
286
287 static void
288 brw_update_texture_surface(struct gl_context *ctx,
289 unsigned unit,
290 uint32_t *surf_offset,
291 bool for_gather)
292 {
293 struct brw_context *brw = brw_context(ctx);
294 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
295 struct intel_texture_object *intelObj = intel_texture_object(tObj);
296 struct intel_mipmap_tree *mt = intelObj->mt;
297 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
298 uint32_t *surf;
299
300 /* BRW_NEW_UNIFORM_BUFFER */
301 if (tObj->Target == GL_TEXTURE_BUFFER) {
302 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
303 return;
304 }
305
306 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
307 6 * 4, 32, surf_offset);
308
309 uint32_t tex_format = translate_tex_format(brw, mt->format,
310 sampler->sRGBDecode);
311
312 if (for_gather) {
313 /* Sandybridge's gather4 message is broken for integer formats.
314 * To work around this, we pretend the surface is UNORM for
315 * 8 or 16-bit formats, and emit shader instructions to recover
316 * the real INT/UINT value. For 32-bit formats, we pretend
317 * the surface is FLOAT, and simply reinterpret the resulting
318 * bits.
319 */
320 switch (tex_format) {
321 case BRW_SURFACEFORMAT_R8_SINT:
322 case BRW_SURFACEFORMAT_R8_UINT:
323 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
324 break;
325
326 case BRW_SURFACEFORMAT_R16_SINT:
327 case BRW_SURFACEFORMAT_R16_UINT:
328 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
329 break;
330
331 case BRW_SURFACEFORMAT_R32_SINT:
332 case BRW_SURFACEFORMAT_R32_UINT:
333 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
334 break;
335
336 default:
337 break;
338 }
339 }
340
341 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
342 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
343 BRW_SURFACE_CUBEFACE_ENABLES |
344 tex_format << BRW_SURFACE_FORMAT_SHIFT);
345
346 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
347
348 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
349 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
350 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
351
352 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
353 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
354 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
355
356 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
357 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
358
359 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
360
361 /* Emit relocation to surface contents */
362 drm_intel_bo_emit_reloc(brw->batch.bo,
363 *surf_offset + 4,
364 mt->bo,
365 surf[1] - mt->bo->offset64,
366 I915_GEM_DOMAIN_SAMPLER, 0);
367 }
368
369 /**
370 * Create the constant buffer surface. Vertex/fragment shader constants will be
371 * read from this buffer with Data Port Read instructions/messages.
372 */
373 void
374 brw_create_constant_surface(struct brw_context *brw,
375 drm_intel_bo *bo,
376 uint32_t offset,
377 uint32_t size,
378 uint32_t *out_offset,
379 bool dword_pitch)
380 {
381 uint32_t stride = dword_pitch ? 4 : 16;
382 uint32_t elements = ALIGN(size, stride) / stride;
383
384 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
385 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
386 elements, stride, 0, false);
387 }
388
389 /**
390 * Set up a binding table entry for use by stream output logic (transform
391 * feedback).
392 *
393 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
394 */
395 void
396 brw_update_sol_surface(struct brw_context *brw,
397 struct gl_buffer_object *buffer_obj,
398 uint32_t *out_offset, unsigned num_vector_components,
399 unsigned stride_dwords, unsigned offset_dwords)
400 {
401 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
402 uint32_t offset_bytes = 4 * offset_dwords;
403 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
404 offset_bytes,
405 buffer_obj->Size - offset_bytes);
406 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
407 out_offset);
408 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
409 size_t size_dwords = buffer_obj->Size / 4;
410 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
411
412 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
413 * too big to map using a single binding table entry?
414 */
415 assert((size_dwords - offset_dwords) / stride_dwords
416 <= BRW_MAX_NUM_BUFFER_ENTRIES);
417
418 if (size_dwords > offset_dwords + num_vector_components) {
419 /* There is room for at least 1 transform feedback output in the buffer.
420 * Compute the number of additional transform feedback outputs the
421 * buffer has room for.
422 */
423 buffer_size_minus_1 =
424 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
425 } else {
426 /* There isn't even room for a single transform feedback output in the
427 * buffer. We can't configure the binding table entry to prevent output
428 * entirely; we'll have to rely on the geometry shader to detect
429 * overflow. But to minimize the damage in case of a bug, set up the
430 * binding table entry to just allow a single output.
431 */
432 buffer_size_minus_1 = 0;
433 }
434 width = buffer_size_minus_1 & 0x7f;
435 height = (buffer_size_minus_1 & 0xfff80) >> 7;
436 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
437
438 switch (num_vector_components) {
439 case 1:
440 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
441 break;
442 case 2:
443 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
444 break;
445 case 3:
446 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
447 break;
448 case 4:
449 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
450 break;
451 default:
452 assert(!"Invalid vector size for transform feedback output");
453 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
454 break;
455 }
456
457 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
458 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
459 surface_format << BRW_SURFACE_FORMAT_SHIFT |
460 BRW_SURFACE_RC_READ_WRITE;
461 surf[1] = bo->offset64 + offset_bytes; /* reloc */
462 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
463 height << BRW_SURFACE_HEIGHT_SHIFT);
464 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
465 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
466 surf[4] = 0;
467 surf[5] = 0;
468
469 /* Emit relocation to surface contents. */
470 drm_intel_bo_emit_reloc(brw->batch.bo,
471 *out_offset + 4,
472 bo, offset_bytes,
473 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
474 }
475
476 /* Creates a new WM constant buffer reflecting the current fragment program's
477 * constants, if needed by the fragment program.
478 *
479 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
480 * state atom.
481 */
482 static void
483 brw_upload_wm_pull_constants(struct brw_context *brw)
484 {
485 struct gl_context *ctx = &brw->ctx;
486 /* BRW_NEW_FRAGMENT_PROGRAM */
487 struct brw_fragment_program *fp =
488 (struct brw_fragment_program *) brw->fragment_program;
489 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
490 const int size = brw->wm.prog_data->base.nr_pull_params * sizeof(float);
491 const int surf_index =
492 brw->wm.prog_data->base.binding_table.pull_constants_start;
493 unsigned int i;
494
495 _mesa_load_state_parameters(ctx, params);
496
497 /* CACHE_NEW_WM_PROG */
498 if (brw->wm.prog_data->base.nr_pull_params == 0) {
499 if (brw->wm.base.surf_offset[surf_index]) {
500 brw->wm.base.surf_offset[surf_index] = 0;
501 brw->state.dirty.brw |= BRW_NEW_SURFACES;
502 }
503 return;
504 }
505
506 /* _NEW_PROGRAM_CONSTANTS */
507 drm_intel_bo *const_bo = NULL;
508 uint32_t const_offset;
509 float *constants = intel_upload_space(brw, size, 64,
510 &const_bo, &const_offset);
511 for (i = 0; i < brw->wm.prog_data->base.nr_pull_params; i++) {
512 constants[i] = *brw->wm.prog_data->base.pull_param[i];
513 }
514
515 brw_create_constant_surface(brw, const_bo, const_offset, size,
516 &brw->wm.base.surf_offset[surf_index],
517 true);
518 drm_intel_bo_unreference(const_bo);
519
520 brw->state.dirty.brw |= BRW_NEW_SURFACES;
521 }
522
523 const struct brw_tracked_state brw_wm_pull_constants = {
524 .dirty = {
525 .mesa = (_NEW_PROGRAM_CONSTANTS),
526 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
527 .cache = CACHE_NEW_WM_PROG,
528 },
529 .emit = brw_upload_wm_pull_constants,
530 };
531
532 static void
533 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
534 {
535 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
536 * Notes):
537 *
538 * A null surface will be used in instances where an actual surface is
539 * not bound. When a write message is generated to a null surface, no
540 * actual surface is written to. When a read message (including any
541 * sampling engine message) is generated to a null surface, the result
542 * is all zeros. Note that a null surface type is allowed to be used
543 * with all messages, even if it is not specificially indicated as
544 * supported. All of the remaining fields in surface state are ignored
545 * for null surfaces, with the following exceptions:
546 *
547 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
548 * depth buffer’s corresponding state for all render target surfaces,
549 * including null.
550 *
551 * - Surface Format must be R8G8B8A8_UNORM.
552 */
553 struct gl_context *ctx = &brw->ctx;
554 uint32_t *surf;
555 unsigned surface_type = BRW_SURFACE_NULL;
556 drm_intel_bo *bo = NULL;
557 unsigned pitch_minus_1 = 0;
558 uint32_t multisampling_state = 0;
559 uint32_t surf_index =
560 brw->wm.prog_data->binding_table.render_target_start + unit;
561
562 /* _NEW_BUFFERS */
563 const struct gl_framebuffer *fb = ctx->DrawBuffer;
564
565 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
566 &brw->wm.base.surf_offset[surf_index]);
567
568 if (fb->Visual.samples > 1) {
569 /* On Gen6, null render targets seem to cause GPU hangs when
570 * multisampling. So work around this problem by rendering into dummy
571 * color buffer.
572 *
573 * To decrease the amount of memory needed by the workaround buffer, we
574 * set its pitch to 128 bytes (the width of a Y tile). This means that
575 * the amount of memory needed for the workaround buffer is
576 * (width_in_tiles + height_in_tiles - 1) tiles.
577 *
578 * Note that since the workaround buffer will be interpreted by the
579 * hardware as an interleaved multisampled buffer, we need to compute
580 * width_in_tiles and height_in_tiles by dividing the width and height
581 * by 16 rather than the normal Y-tile size of 32.
582 */
583 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
584 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
585 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
586 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
587 size_needed);
588 bo = brw->wm.multisampled_null_render_target_bo;
589 surface_type = BRW_SURFACE_2D;
590 pitch_minus_1 = 127;
591 multisampling_state =
592 brw_get_surface_num_multisamples(fb->Visual.samples);
593 }
594
595 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
596 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
597 if (brw->gen < 6) {
598 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
599 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
600 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
601 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
602 }
603 surf[1] = bo ? bo->offset64 : 0;
604 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
605 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
606
607 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
608 * Notes):
609 *
610 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
611 */
612 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
613 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
614 surf[4] = multisampling_state;
615 surf[5] = 0;
616
617 if (bo) {
618 drm_intel_bo_emit_reloc(brw->batch.bo,
619 brw->wm.base.surf_offset[surf_index] + 4,
620 bo, 0,
621 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
622 }
623 }
624
625 /**
626 * Sets up a surface state structure to point at the given region.
627 * While it is only used for the front/back buffer currently, it should be
628 * usable for further buffers when doing ARB_draw_buffer support.
629 */
630 static void
631 brw_update_renderbuffer_surface(struct brw_context *brw,
632 struct gl_renderbuffer *rb,
633 bool layered,
634 unsigned int unit)
635 {
636 struct gl_context *ctx = &brw->ctx;
637 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
638 struct intel_mipmap_tree *mt = irb->mt;
639 uint32_t *surf;
640 uint32_t tile_x, tile_y;
641 uint32_t format = 0;
642 /* _NEW_BUFFERS */
643 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
644 uint32_t surf_index =
645 brw->wm.prog_data->binding_table.render_target_start + unit;
646
647 assert(!layered);
648
649 if (rb->TexImage && !brw->has_surface_tile_offset) {
650 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
651
652 if (tile_x != 0 || tile_y != 0) {
653 /* Original gen4 hardware couldn't draw to a non-tile-aligned
654 * destination in a miptree unless you actually setup your renderbuffer
655 * as a miptree and used the fragile lod/array_index/etc. controls to
656 * select the image. So, instead, we just make a new single-level
657 * miptree and render into that.
658 */
659 intel_renderbuffer_move_to_temp(brw, irb, false);
660 mt = irb->mt;
661 }
662 }
663
664 intel_miptree_used_for_rendering(irb->mt);
665
666 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
667 &brw->wm.base.surf_offset[surf_index]);
668
669 format = brw->render_target_format[rb_format];
670 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
671 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
672 __FUNCTION__, _mesa_get_format_name(rb_format));
673 }
674
675 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
676 format << BRW_SURFACE_FORMAT_SHIFT);
677
678 /* reloc */
679 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
680 mt->bo->offset64);
681
682 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
683 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
684
685 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
686 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
687
688 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
689
690 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
691 /* Note that the low bits of these fields are missing, so
692 * there's the possibility of getting in trouble.
693 */
694 assert(tile_x % 4 == 0);
695 assert(tile_y % 2 == 0);
696 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
697 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
698 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
699
700 if (brw->gen < 6) {
701 /* _NEW_COLOR */
702 if (!ctx->Color.ColorLogicOpEnabled &&
703 (ctx->Color.BlendEnabled & (1 << unit)))
704 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
705
706 if (!ctx->Color.ColorMask[unit][0])
707 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
708 if (!ctx->Color.ColorMask[unit][1])
709 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
710 if (!ctx->Color.ColorMask[unit][2])
711 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
712
713 /* As mentioned above, disable writes to the alpha component when the
714 * renderbuffer is XRGB.
715 */
716 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
717 !ctx->Color.ColorMask[unit][3]) {
718 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
719 }
720 }
721
722 drm_intel_bo_emit_reloc(brw->batch.bo,
723 brw->wm.base.surf_offset[surf_index] + 4,
724 mt->bo,
725 surf[1] - mt->bo->offset64,
726 I915_GEM_DOMAIN_RENDER,
727 I915_GEM_DOMAIN_RENDER);
728 }
729
730 /**
731 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
732 */
733 static void
734 brw_update_renderbuffer_surfaces(struct brw_context *brw)
735 {
736 struct gl_context *ctx = &brw->ctx;
737 GLuint i;
738
739 /* _NEW_BUFFERS | _NEW_COLOR */
740 /* Update surfaces for drawing buffers */
741 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
742 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
743 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
744 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
745 ctx->DrawBuffer->MaxNumLayers > 0, i);
746 } else {
747 brw->vtbl.update_null_renderbuffer_surface(brw, i);
748 }
749 }
750 } else {
751 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
752 }
753 brw->state.dirty.brw |= BRW_NEW_SURFACES;
754 }
755
756 const struct brw_tracked_state brw_renderbuffer_surfaces = {
757 .dirty = {
758 .mesa = (_NEW_COLOR |
759 _NEW_BUFFERS),
760 .brw = BRW_NEW_BATCH,
761 .cache = 0
762 },
763 .emit = brw_update_renderbuffer_surfaces,
764 };
765
766 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
767 .dirty = {
768 .mesa = _NEW_BUFFERS,
769 .brw = BRW_NEW_BATCH,
770 .cache = 0
771 },
772 .emit = brw_update_renderbuffer_surfaces,
773 };
774
775
776 static void
777 update_stage_texture_surfaces(struct brw_context *brw,
778 const struct gl_program *prog,
779 struct brw_stage_state *stage_state,
780 bool for_gather)
781 {
782 if (!prog)
783 return;
784
785 struct gl_context *ctx = &brw->ctx;
786
787 uint32_t *surf_offset = stage_state->surf_offset;
788 if (for_gather)
789 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
790 else
791 surf_offset += stage_state->prog_data->binding_table.texture_start;
792
793 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
794 for (unsigned s = 0; s < num_samplers; s++) {
795 surf_offset[s] = 0;
796
797 if (prog->SamplersUsed & (1 << s)) {
798 const unsigned unit = prog->SamplerUnits[s];
799
800 /* _NEW_TEXTURE */
801 if (ctx->Texture.Unit[unit]._Current) {
802 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
803 }
804 }
805 }
806 }
807
808
809 /**
810 * Construct SURFACE_STATE objects for enabled textures.
811 */
812 static void
813 brw_update_texture_surfaces(struct brw_context *brw)
814 {
815 /* BRW_NEW_VERTEX_PROGRAM */
816 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
817
818 /* BRW_NEW_GEOMETRY_PROGRAM */
819 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
820
821 /* BRW_NEW_FRAGMENT_PROGRAM */
822 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
823
824 /* _NEW_TEXTURE */
825 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
826 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
827 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
828
829 /* emit alternate set of surface state for gather. this
830 * allows the surface format to be overriden for only the
831 * gather4 messages. */
832 if (vs && vs->UsesGather)
833 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
834 if (gs && gs->UsesGather)
835 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
836 if (fs && fs->UsesGather)
837 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
838
839 brw->state.dirty.brw |= BRW_NEW_SURFACES;
840 }
841
842 const struct brw_tracked_state brw_texture_surfaces = {
843 .dirty = {
844 .mesa = _NEW_TEXTURE,
845 .brw = BRW_NEW_BATCH |
846 BRW_NEW_UNIFORM_BUFFER |
847 BRW_NEW_VERTEX_PROGRAM |
848 BRW_NEW_GEOMETRY_PROGRAM |
849 BRW_NEW_FRAGMENT_PROGRAM,
850 .cache = 0
851 },
852 .emit = brw_update_texture_surfaces,
853 };
854
855 void
856 brw_upload_ubo_surfaces(struct brw_context *brw,
857 struct gl_shader *shader,
858 struct brw_stage_state *stage_state,
859 struct brw_stage_prog_data *prog_data)
860 {
861 struct gl_context *ctx = &brw->ctx;
862
863 if (!shader)
864 return;
865
866 uint32_t *surf_offsets =
867 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
868
869 for (int i = 0; i < shader->NumUniformBlocks; i++) {
870 struct gl_uniform_buffer_binding *binding;
871 struct intel_buffer_object *intel_bo;
872
873 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
874 intel_bo = intel_buffer_object(binding->BufferObject);
875 drm_intel_bo *bo =
876 intel_bufferobj_buffer(brw, intel_bo,
877 binding->Offset,
878 binding->BufferObject->Size - binding->Offset);
879
880 /* Because behavior for referencing outside of the binding's size in the
881 * glBindBufferRange case is undefined, we can just bind the whole buffer
882 * glBindBufferBase wants and be a correct implementation.
883 */
884 brw_create_constant_surface(brw, bo, binding->Offset,
885 bo->size - binding->Offset,
886 &surf_offsets[i],
887 shader->Stage == MESA_SHADER_FRAGMENT);
888 }
889
890 if (shader->NumUniformBlocks)
891 brw->state.dirty.brw |= BRW_NEW_SURFACES;
892 }
893
894 static void
895 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
896 {
897 struct gl_context *ctx = &brw->ctx;
898 /* _NEW_PROGRAM */
899 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
900
901 if (!prog)
902 return;
903
904 /* CACHE_NEW_WM_PROG */
905 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
906 &brw->wm.base, &brw->wm.prog_data->base);
907 }
908
909 const struct brw_tracked_state brw_wm_ubo_surfaces = {
910 .dirty = {
911 .mesa = _NEW_PROGRAM,
912 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
913 .cache = CACHE_NEW_WM_PROG,
914 },
915 .emit = brw_upload_wm_ubo_surfaces,
916 };
917
918 void
919 brw_upload_abo_surfaces(struct brw_context *brw,
920 struct gl_shader_program *prog,
921 struct brw_stage_state *stage_state,
922 struct brw_stage_prog_data *prog_data)
923 {
924 struct gl_context *ctx = &brw->ctx;
925 uint32_t *surf_offsets =
926 &stage_state->surf_offset[prog_data->binding_table.abo_start];
927
928 for (int i = 0; i < prog->NumAtomicBuffers; i++) {
929 struct gl_atomic_buffer_binding *binding =
930 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
931 struct intel_buffer_object *intel_bo =
932 intel_buffer_object(binding->BufferObject);
933 drm_intel_bo *bo = intel_bufferobj_buffer(
934 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
935
936 brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
937 bo->size - binding->Offset,
938 &surf_offsets[i], true);
939 }
940
941 if (prog->NumUniformBlocks)
942 brw->state.dirty.brw |= BRW_NEW_SURFACES;
943 }
944
945 static void
946 brw_upload_wm_abo_surfaces(struct brw_context *brw)
947 {
948 struct gl_context *ctx = &brw->ctx;
949 /* _NEW_PROGRAM */
950 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
951
952 if (prog) {
953 /* CACHE_NEW_WM_PROG */
954 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
955 &brw->wm.prog_data->base);
956 }
957 }
958
959 const struct brw_tracked_state brw_wm_abo_surfaces = {
960 .dirty = {
961 .mesa = _NEW_PROGRAM,
962 .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
963 .cache = CACHE_NEW_WM_PROG,
964 },
965 .emit = brw_upload_wm_abo_surfaces,
966 };
967
968 void
969 gen4_init_vtable_surface_functions(struct brw_context *brw)
970 {
971 brw->vtbl.update_texture_surface = brw_update_texture_surface;
972 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
973 brw->vtbl.update_null_renderbuffer_surface =
974 brw_update_null_renderbuffer_surface;
975 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
976 }