intel: Add a batch flush between front-buffer downsample and X protocol.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193
194 static void
195 brw_update_buffer_texture_surface(struct gl_context *ctx,
196 unsigned unit,
197 uint32_t *binding_table,
198 unsigned surf_index)
199 {
200 struct brw_context *brw = brw_context(ctx);
201 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
202 uint32_t *surf;
203 struct intel_buffer_object *intel_obj =
204 intel_buffer_object(tObj->BufferObject);
205 drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
206 gl_format format = tObj->_BufferObjectFormat;
207 uint32_t brw_format = brw_format_for_mesa_format(format);
208 int texel_size = _mesa_get_format_bytes(format);
209
210 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
211 _mesa_problem(NULL, "bad format %s for texture buffer\n",
212 _mesa_get_format_name(format));
213 }
214
215 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
216 6 * 4, 32, &binding_table[surf_index]);
217
218 surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
219 (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
220
221 if (brw->gen >= 6)
222 surf[0] |= BRW_SURFACE_RC_READ_WRITE;
223
224 if (bo) {
225 surf[1] = bo->offset; /* reloc */
226
227 /* Emit relocation to surface contents. */
228 drm_intel_bo_emit_reloc(brw->batch.bo,
229 binding_table[surf_index] + 4,
230 bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
231
232 int w = intel_obj->Base.Size / texel_size;
233 surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
234 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
235 surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
236 (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
237 } else {
238 surf[1] = 0;
239 surf[2] = 0;
240 surf[3] = 0;
241 }
242
243 surf[4] = 0;
244 surf[5] = 0;
245 }
246
247 static void
248 brw_update_texture_surface(struct gl_context *ctx,
249 unsigned unit,
250 uint32_t *binding_table,
251 unsigned surf_index)
252 {
253 struct brw_context *brw = brw_context(ctx);
254 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
255 struct intel_texture_object *intelObj = intel_texture_object(tObj);
256 struct intel_mipmap_tree *mt = intelObj->mt;
257 struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
258 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
259 uint32_t *surf;
260 uint32_t tile_x, tile_y;
261
262 if (tObj->Target == GL_TEXTURE_BUFFER) {
263 brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
264 return;
265 }
266
267 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
268 6 * 4, 32, &binding_table[surf_index]);
269
270 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
271 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
272 BRW_SURFACE_CUBEFACE_ENABLES |
273 (translate_tex_format(brw,
274 mt->format,
275 tObj->DepthMode,
276 sampler->sRGBDecode) <<
277 BRW_SURFACE_FORMAT_SHIFT));
278
279 surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
280 surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
281 &tile_x, &tile_y);
282
283 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
284 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
285 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
286
287 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
288 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
289 (intelObj->mt->region->pitch - 1) <<
290 BRW_SURFACE_PITCH_SHIFT);
291
292 surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
293
294 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
295 /* Note that the low bits of these fields are missing, so
296 * there's the possibility of getting in trouble.
297 */
298 assert(tile_x % 4 == 0);
299 assert(tile_y % 2 == 0);
300 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
301 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
302 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
303
304 /* Emit relocation to surface contents */
305 drm_intel_bo_emit_reloc(brw->batch.bo,
306 binding_table[surf_index] + 4,
307 intelObj->mt->region->bo,
308 surf[1] - intelObj->mt->region->bo->offset,
309 I915_GEM_DOMAIN_SAMPLER, 0);
310 }
311
312 /**
313 * Create the constant buffer surface. Vertex/fragment shader constants will be
314 * read from this buffer with Data Port Read instructions/messages.
315 */
316 static void
317 brw_create_constant_surface(struct brw_context *brw,
318 drm_intel_bo *bo,
319 uint32_t offset,
320 uint32_t size,
321 uint32_t *out_offset,
322 bool dword_pitch)
323 {
324 uint32_t stride = dword_pitch ? 4 : 16;
325 uint32_t elements = ALIGN(size, stride) / stride;
326 const GLint w = elements - 1;
327 uint32_t *surf;
328
329 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
330 6 * 4, 32, out_offset);
331
332 surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
333 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
334 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
335
336 if (brw->gen >= 6)
337 surf[0] |= BRW_SURFACE_RC_READ_WRITE;
338
339 surf[1] = bo->offset + offset; /* reloc */
340
341 surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
342 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
343
344 surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
345 (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
346
347 surf[4] = 0;
348 surf[5] = 0;
349
350 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
351 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
352 * physical cache. It is mapped in hardware to the sampler cache."
353 */
354 drm_intel_bo_emit_reloc(brw->batch.bo,
355 *out_offset + 4,
356 bo, offset,
357 I915_GEM_DOMAIN_SAMPLER, 0);
358 }
359
360 /**
361 * Set up a binding table entry for use by stream output logic (transform
362 * feedback).
363 *
364 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
365 */
366 void
367 brw_update_sol_surface(struct brw_context *brw,
368 struct gl_buffer_object *buffer_obj,
369 uint32_t *out_offset, unsigned num_vector_components,
370 unsigned stride_dwords, unsigned offset_dwords)
371 {
372 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
373 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
374 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
375 out_offset);
376 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
377 uint32_t offset_bytes = 4 * offset_dwords;
378 size_t size_dwords = buffer_obj->Size / 4;
379 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
380
381 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
382 * too big to map using a single binding table entry?
383 */
384 assert((size_dwords - offset_dwords) / stride_dwords
385 <= BRW_MAX_NUM_BUFFER_ENTRIES);
386
387 if (size_dwords > offset_dwords + num_vector_components) {
388 /* There is room for at least 1 transform feedback output in the buffer.
389 * Compute the number of additional transform feedback outputs the
390 * buffer has room for.
391 */
392 buffer_size_minus_1 =
393 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
394 } else {
395 /* There isn't even room for a single transform feedback output in the
396 * buffer. We can't configure the binding table entry to prevent output
397 * entirely; we'll have to rely on the geometry shader to detect
398 * overflow. But to minimize the damage in case of a bug, set up the
399 * binding table entry to just allow a single output.
400 */
401 buffer_size_minus_1 = 0;
402 }
403 width = buffer_size_minus_1 & 0x7f;
404 height = (buffer_size_minus_1 & 0xfff80) >> 7;
405 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
406
407 switch (num_vector_components) {
408 case 1:
409 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
410 break;
411 case 2:
412 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
413 break;
414 case 3:
415 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
416 break;
417 case 4:
418 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
419 break;
420 default:
421 assert(!"Invalid vector size for transform feedback output");
422 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
423 break;
424 }
425
426 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
427 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
428 surface_format << BRW_SURFACE_FORMAT_SHIFT |
429 BRW_SURFACE_RC_READ_WRITE;
430 surf[1] = bo->offset + offset_bytes; /* reloc */
431 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
432 height << BRW_SURFACE_HEIGHT_SHIFT);
433 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
434 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
435 surf[4] = 0;
436 surf[5] = 0;
437
438 /* Emit relocation to surface contents. */
439 drm_intel_bo_emit_reloc(brw->batch.bo,
440 *out_offset + 4,
441 bo, offset_bytes,
442 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
443 }
444
445 /* Creates a new WM constant buffer reflecting the current fragment program's
446 * constants, if needed by the fragment program.
447 *
448 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
449 * state atom.
450 */
451 static void
452 brw_upload_wm_pull_constants(struct brw_context *brw)
453 {
454 struct gl_context *ctx = &brw->ctx;
455 /* BRW_NEW_FRAGMENT_PROGRAM */
456 struct brw_fragment_program *fp =
457 (struct brw_fragment_program *) brw->fragment_program;
458 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
459 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
460 const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
461 float *constants;
462 unsigned int i;
463
464 _mesa_load_state_parameters(ctx, params);
465
466 /* CACHE_NEW_WM_PROG */
467 if (brw->wm.prog_data->nr_pull_params == 0) {
468 if (brw->wm.const_bo) {
469 drm_intel_bo_unreference(brw->wm.const_bo);
470 brw->wm.const_bo = NULL;
471 brw->wm.surf_offset[surf_index] = 0;
472 brw->state.dirty.brw |= BRW_NEW_SURFACES;
473 }
474 return;
475 }
476
477 drm_intel_bo_unreference(brw->wm.const_bo);
478 brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
479 size, 64);
480
481 /* _NEW_PROGRAM_CONSTANTS */
482 drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
483 constants = brw->wm.const_bo->virtual;
484 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
485 constants[i] = *brw->wm.prog_data->pull_param[i];
486 }
487 drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
488
489 brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
490 &brw->wm.surf_offset[surf_index],
491 true);
492
493 brw->state.dirty.brw |= BRW_NEW_SURFACES;
494 }
495
496 const struct brw_tracked_state brw_wm_pull_constants = {
497 .dirty = {
498 .mesa = (_NEW_PROGRAM_CONSTANTS),
499 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
500 .cache = CACHE_NEW_WM_PROG,
501 },
502 .emit = brw_upload_wm_pull_constants,
503 };
504
505 static void
506 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
507 {
508 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
509 * Notes):
510 *
511 * A null surface will be used in instances where an actual surface is
512 * not bound. When a write message is generated to a null surface, no
513 * actual surface is written to. When a read message (including any
514 * sampling engine message) is generated to a null surface, the result
515 * is all zeros. Note that a null surface type is allowed to be used
516 * with all messages, even if it is not specificially indicated as
517 * supported. All of the remaining fields in surface state are ignored
518 * for null surfaces, with the following exceptions:
519 *
520 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
521 * depth buffer’s corresponding state for all render target surfaces,
522 * including null.
523 *
524 * - Surface Format must be R8G8B8A8_UNORM.
525 */
526 struct gl_context *ctx = &brw->ctx;
527 uint32_t *surf;
528 unsigned surface_type = BRW_SURFACE_NULL;
529 drm_intel_bo *bo = NULL;
530 unsigned pitch_minus_1 = 0;
531 uint32_t multisampling_state = 0;
532
533 /* _NEW_BUFFERS */
534 const struct gl_framebuffer *fb = ctx->DrawBuffer;
535
536 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
537 &brw->wm.surf_offset[SURF_INDEX_DRAW(unit)]);
538
539 if (fb->Visual.samples > 1) {
540 /* On Gen6, null render targets seem to cause GPU hangs when
541 * multisampling. So work around this problem by rendering into dummy
542 * color buffer.
543 *
544 * To decrease the amount of memory needed by the workaround buffer, we
545 * set its pitch to 128 bytes (the width of a Y tile). This means that
546 * the amount of memory needed for the workaround buffer is
547 * (width_in_tiles + height_in_tiles - 1) tiles.
548 *
549 * Note that since the workaround buffer will be interpreted by the
550 * hardware as an interleaved multisampled buffer, we need to compute
551 * width_in_tiles and height_in_tiles by dividing the width and height
552 * by 16 rather than the normal Y-tile size of 32.
553 */
554 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
555 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
556 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
557 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
558 size_needed);
559 bo = brw->wm.multisampled_null_render_target_bo;
560 surface_type = BRW_SURFACE_2D;
561 pitch_minus_1 = 127;
562 multisampling_state =
563 brw_get_surface_num_multisamples(fb->Visual.samples);
564 }
565
566 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
567 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
568 if (brw->gen < 6) {
569 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
570 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
571 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
572 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
573 }
574 surf[1] = bo ? bo->offset : 0;
575 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
576 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
577
578 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
579 * Notes):
580 *
581 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
582 */
583 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
584 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
585 surf[4] = multisampling_state;
586 surf[5] = 0;
587
588 if (bo) {
589 drm_intel_bo_emit_reloc(brw->batch.bo,
590 brw->wm.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
591 bo, 0,
592 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
593 }
594 }
595
596 /**
597 * Sets up a surface state structure to point at the given region.
598 * While it is only used for the front/back buffer currently, it should be
599 * usable for further buffers when doing ARB_draw_buffer support.
600 */
601 static void
602 brw_update_renderbuffer_surface(struct brw_context *brw,
603 struct gl_renderbuffer *rb,
604 bool layered,
605 unsigned int unit)
606 {
607 struct gl_context *ctx = &brw->ctx;
608 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
609 struct intel_mipmap_tree *mt = irb->mt;
610 struct intel_region *region;
611 uint32_t *surf;
612 uint32_t tile_x, tile_y;
613 uint32_t format = 0;
614 /* _NEW_BUFFERS */
615 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
616
617 assert(!layered);
618
619 if (rb->TexImage && !brw->has_surface_tile_offset) {
620 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
621
622 if (tile_x != 0 || tile_y != 0) {
623 /* Original gen4 hardware couldn't draw to a non-tile-aligned
624 * destination in a miptree unless you actually setup your renderbuffer
625 * as a miptree and used the fragile lod/array_index/etc. controls to
626 * select the image. So, instead, we just make a new single-level
627 * miptree and render into that.
628 */
629 intel_renderbuffer_move_to_temp(brw, irb, false);
630 mt = irb->mt;
631 }
632 }
633
634 intel_miptree_used_for_rendering(irb->mt);
635
636 region = irb->mt->region;
637
638 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
639 &brw->wm.surf_offset[SURF_INDEX_DRAW(unit)]);
640
641 format = brw->render_target_format[rb_format];
642 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
643 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
644 __FUNCTION__, _mesa_get_format_name(rb_format));
645 }
646
647 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
648 format << BRW_SURFACE_FORMAT_SHIFT);
649
650 /* reloc */
651 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
652 region->bo->offset);
653
654 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
655 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
656
657 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
658 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
659
660 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
661
662 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
663 /* Note that the low bits of these fields are missing, so
664 * there's the possibility of getting in trouble.
665 */
666 assert(tile_x % 4 == 0);
667 assert(tile_y % 2 == 0);
668 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
669 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
670 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
671
672 if (brw->gen < 6) {
673 /* _NEW_COLOR */
674 if (!ctx->Color.ColorLogicOpEnabled &&
675 (ctx->Color.BlendEnabled & (1 << unit)))
676 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
677
678 if (!ctx->Color.ColorMask[unit][0])
679 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
680 if (!ctx->Color.ColorMask[unit][1])
681 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
682 if (!ctx->Color.ColorMask[unit][2])
683 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
684
685 /* As mentioned above, disable writes to the alpha component when the
686 * renderbuffer is XRGB.
687 */
688 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
689 !ctx->Color.ColorMask[unit][3]) {
690 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
691 }
692 }
693
694 drm_intel_bo_emit_reloc(brw->batch.bo,
695 brw->wm.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
696 region->bo,
697 surf[1] - region->bo->offset,
698 I915_GEM_DOMAIN_RENDER,
699 I915_GEM_DOMAIN_RENDER);
700 }
701
702 /**
703 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
704 */
705 static void
706 brw_update_renderbuffer_surfaces(struct brw_context *brw)
707 {
708 struct gl_context *ctx = &brw->ctx;
709 GLuint i;
710
711 /* _NEW_BUFFERS | _NEW_COLOR */
712 /* Update surfaces for drawing buffers */
713 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
714 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
715 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
716 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
717 ctx->DrawBuffer->Layered, i);
718 } else {
719 brw->vtbl.update_null_renderbuffer_surface(brw, i);
720 }
721 }
722 } else {
723 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
724 }
725 brw->state.dirty.brw |= BRW_NEW_SURFACES;
726 }
727
728 const struct brw_tracked_state brw_renderbuffer_surfaces = {
729 .dirty = {
730 .mesa = (_NEW_COLOR |
731 _NEW_BUFFERS),
732 .brw = BRW_NEW_BATCH,
733 .cache = 0
734 },
735 .emit = brw_update_renderbuffer_surfaces,
736 };
737
738 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
739 .dirty = {
740 .mesa = _NEW_BUFFERS,
741 .brw = BRW_NEW_BATCH,
742 .cache = 0
743 },
744 .emit = brw_update_renderbuffer_surfaces,
745 };
746
747 /**
748 * Construct SURFACE_STATE objects for enabled textures.
749 */
750 static void
751 brw_update_texture_surfaces(struct brw_context *brw)
752 {
753 struct gl_context *ctx = &brw->ctx;
754
755 /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
756 * Unfortunately, we're stuck using the gl_program structs until the
757 * ARB_fragment_program front-end gets converted to GLSL IR. These
758 * have the downside that SamplerUnits is split and only contains the
759 * mappings for samplers active in that stage.
760 */
761 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
762 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
763
764 unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
765
766 for (unsigned s = 0; s < num_samplers; s++) {
767 brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
768 brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
769
770 if (vs->SamplersUsed & (1 << s)) {
771 const unsigned unit = vs->SamplerUnits[s];
772
773 /* _NEW_TEXTURE */
774 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
775 brw->vtbl.update_texture_surface(ctx, unit,
776 brw->vs.surf_offset,
777 SURF_INDEX_VS_TEXTURE(s));
778 }
779 }
780
781 if (fs->SamplersUsed & (1 << s)) {
782 const unsigned unit = fs->SamplerUnits[s];
783
784 /* _NEW_TEXTURE */
785 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
786 brw->vtbl.update_texture_surface(ctx, unit,
787 brw->wm.surf_offset,
788 SURF_INDEX_TEXTURE(s));
789 }
790 }
791 }
792
793 brw->state.dirty.brw |= BRW_NEW_SURFACES;
794 }
795
796 const struct brw_tracked_state brw_texture_surfaces = {
797 .dirty = {
798 .mesa = _NEW_TEXTURE,
799 .brw = BRW_NEW_BATCH |
800 BRW_NEW_VERTEX_PROGRAM |
801 BRW_NEW_FRAGMENT_PROGRAM,
802 .cache = 0
803 },
804 .emit = brw_update_texture_surfaces,
805 };
806
807 void
808 brw_upload_ubo_surfaces(struct brw_context *brw,
809 struct gl_shader *shader,
810 uint32_t *surf_offsets)
811 {
812 struct gl_context *ctx = &brw->ctx;
813
814 if (!shader)
815 return;
816
817 for (int i = 0; i < shader->NumUniformBlocks; i++) {
818 struct gl_uniform_buffer_binding *binding;
819 struct intel_buffer_object *intel_bo;
820
821 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
822 intel_bo = intel_buffer_object(binding->BufferObject);
823 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
824
825 /* Because behavior for referencing outside of the binding's size in the
826 * glBindBufferRange case is undefined, we can just bind the whole buffer
827 * glBindBufferBase wants and be a correct implementation.
828 */
829 brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
830 bo->size - binding->Offset,
831 &surf_offsets[i],
832 shader->Type == GL_FRAGMENT_SHADER);
833 }
834
835 if (shader->NumUniformBlocks)
836 brw->state.dirty.brw |= BRW_NEW_SURFACES;
837 }
838
839 static void
840 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
841 {
842 struct gl_context *ctx = &brw->ctx;
843 /* _NEW_PROGRAM */
844 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
845
846 if (!prog)
847 return;
848
849 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
850 &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
851 }
852
853 const struct brw_tracked_state brw_wm_ubo_surfaces = {
854 .dirty = {
855 .mesa = _NEW_PROGRAM,
856 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
857 .cache = 0,
858 },
859 .emit = brw_upload_wm_ubo_surfaces,
860 };
861
862 /**
863 * Constructs the binding table for the WM surface state, which maps unit
864 * numbers to surface state objects.
865 */
866 static void
867 brw_upload_wm_binding_table(struct brw_context *brw)
868 {
869 uint32_t *bind;
870 int i;
871
872 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
873 gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
874 }
875
876 /* CACHE_NEW_WM_PROG */
877 unsigned entries = brw->wm.prog_data->binding_table_size;
878 bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
879 sizeof(uint32_t) * entries,
880 32, &brw->wm.bind_bo_offset);
881
882 /* BRW_NEW_SURFACES */
883 for (i = 0; i < entries; i++) {
884 bind[i] = brw->wm.surf_offset[i];
885 }
886
887 brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
888 }
889
890 const struct brw_tracked_state brw_wm_binding_table = {
891 .dirty = {
892 .mesa = 0,
893 .brw = (BRW_NEW_BATCH |
894 BRW_NEW_SURFACES),
895 .cache = CACHE_NEW_WM_PROG
896 },
897 .emit = brw_upload_wm_binding_table,
898 };
899
900 void
901 gen4_init_vtable_surface_functions(struct brw_context *brw)
902 {
903 brw->vtbl.update_texture_surface = brw_update_texture_surface;
904 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
905 brw->vtbl.update_null_renderbuffer_surface =
906 brw_update_null_renderbuffer_surface;
907 brw->vtbl.create_constant_surface = brw_create_constant_surface;
908 }