i965: Use brw_stage_state for WM data as well.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193
194 static void
195 brw_update_buffer_texture_surface(struct gl_context *ctx,
196 unsigned unit,
197 uint32_t *surf_offset)
198 {
199 struct brw_context *brw = brw_context(ctx);
200 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
201 uint32_t *surf;
202 struct intel_buffer_object *intel_obj =
203 intel_buffer_object(tObj->BufferObject);
204 drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
205 gl_format format = tObj->_BufferObjectFormat;
206 uint32_t brw_format = brw_format_for_mesa_format(format);
207 int texel_size = _mesa_get_format_bytes(format);
208
209 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
210 _mesa_problem(NULL, "bad format %s for texture buffer\n",
211 _mesa_get_format_name(format));
212 }
213
214 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
215 6 * 4, 32, surf_offset);
216
217 surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
218 (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
219
220 if (brw->gen >= 6)
221 surf[0] |= BRW_SURFACE_RC_READ_WRITE;
222
223 if (bo) {
224 surf[1] = bo->offset; /* reloc */
225
226 /* Emit relocation to surface contents. */
227 drm_intel_bo_emit_reloc(brw->batch.bo,
228 *surf_offset + 4,
229 bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
230
231 int w = intel_obj->Base.Size / texel_size;
232 surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
233 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
234 surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
235 (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
236 } else {
237 surf[1] = 0;
238 surf[2] = 0;
239 surf[3] = 0;
240 }
241
242 surf[4] = 0;
243 surf[5] = 0;
244 }
245
246 static void
247 brw_update_texture_surface(struct gl_context *ctx,
248 unsigned unit,
249 uint32_t *surf_offset)
250 {
251 struct brw_context *brw = brw_context(ctx);
252 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
253 struct intel_texture_object *intelObj = intel_texture_object(tObj);
254 struct intel_mipmap_tree *mt = intelObj->mt;
255 struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
256 struct intel_texture_image *intel_image = intel_texture_image(firstImage);
257 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
258 uint32_t *surf;
259
260 if (tObj->Target == GL_TEXTURE_BUFFER) {
261 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
262 return;
263 }
264
265 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
266 6 * 4, 32, surf_offset);
267
268 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
269 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
270 BRW_SURFACE_CUBEFACE_ENABLES |
271 (translate_tex_format(brw,
272 mt->format,
273 tObj->DepthMode,
274 sampler->sRGBDecode) <<
275 BRW_SURFACE_FORMAT_SHIFT));
276
277 surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
278
279 surf[2] = ((intelObj->_MaxLevel - intel_image->mt->first_level) << BRW_SURFACE_LOD_SHIFT |
280 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
281 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
282
283 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
284 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
285 (intelObj->mt->region->pitch - 1) <<
286 BRW_SURFACE_PITCH_SHIFT);
287
288 surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
289
290 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
291
292 /* Emit relocation to surface contents */
293 drm_intel_bo_emit_reloc(brw->batch.bo,
294 *surf_offset + 4,
295 intelObj->mt->region->bo,
296 surf[1] - intelObj->mt->region->bo->offset,
297 I915_GEM_DOMAIN_SAMPLER, 0);
298 }
299
300 /**
301 * Create the constant buffer surface. Vertex/fragment shader constants will be
302 * read from this buffer with Data Port Read instructions/messages.
303 */
304 static void
305 brw_create_constant_surface(struct brw_context *brw,
306 drm_intel_bo *bo,
307 uint32_t offset,
308 uint32_t size,
309 uint32_t *out_offset,
310 bool dword_pitch)
311 {
312 uint32_t stride = dword_pitch ? 4 : 16;
313 uint32_t elements = ALIGN(size, stride) / stride;
314 const GLint w = elements - 1;
315 uint32_t *surf;
316
317 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
318 6 * 4, 32, out_offset);
319
320 surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
321 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
322 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
323
324 if (brw->gen >= 6)
325 surf[0] |= BRW_SURFACE_RC_READ_WRITE;
326
327 surf[1] = bo->offset + offset; /* reloc */
328
329 surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
330 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
331
332 surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
333 (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
334
335 surf[4] = 0;
336 surf[5] = 0;
337
338 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
339 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
340 * physical cache. It is mapped in hardware to the sampler cache."
341 */
342 drm_intel_bo_emit_reloc(brw->batch.bo,
343 *out_offset + 4,
344 bo, offset,
345 I915_GEM_DOMAIN_SAMPLER, 0);
346 }
347
348 /**
349 * Set up a binding table entry for use by stream output logic (transform
350 * feedback).
351 *
352 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
353 */
354 void
355 brw_update_sol_surface(struct brw_context *brw,
356 struct gl_buffer_object *buffer_obj,
357 uint32_t *out_offset, unsigned num_vector_components,
358 unsigned stride_dwords, unsigned offset_dwords)
359 {
360 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
361 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
362 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
363 out_offset);
364 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
365 uint32_t offset_bytes = 4 * offset_dwords;
366 size_t size_dwords = buffer_obj->Size / 4;
367 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
368
369 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
370 * too big to map using a single binding table entry?
371 */
372 assert((size_dwords - offset_dwords) / stride_dwords
373 <= BRW_MAX_NUM_BUFFER_ENTRIES);
374
375 if (size_dwords > offset_dwords + num_vector_components) {
376 /* There is room for at least 1 transform feedback output in the buffer.
377 * Compute the number of additional transform feedback outputs the
378 * buffer has room for.
379 */
380 buffer_size_minus_1 =
381 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
382 } else {
383 /* There isn't even room for a single transform feedback output in the
384 * buffer. We can't configure the binding table entry to prevent output
385 * entirely; we'll have to rely on the geometry shader to detect
386 * overflow. But to minimize the damage in case of a bug, set up the
387 * binding table entry to just allow a single output.
388 */
389 buffer_size_minus_1 = 0;
390 }
391 width = buffer_size_minus_1 & 0x7f;
392 height = (buffer_size_minus_1 & 0xfff80) >> 7;
393 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
394
395 switch (num_vector_components) {
396 case 1:
397 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
398 break;
399 case 2:
400 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
401 break;
402 case 3:
403 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
404 break;
405 case 4:
406 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
407 break;
408 default:
409 assert(!"Invalid vector size for transform feedback output");
410 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
411 break;
412 }
413
414 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
415 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
416 surface_format << BRW_SURFACE_FORMAT_SHIFT |
417 BRW_SURFACE_RC_READ_WRITE;
418 surf[1] = bo->offset + offset_bytes; /* reloc */
419 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
420 height << BRW_SURFACE_HEIGHT_SHIFT);
421 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
422 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
423 surf[4] = 0;
424 surf[5] = 0;
425
426 /* Emit relocation to surface contents. */
427 drm_intel_bo_emit_reloc(brw->batch.bo,
428 *out_offset + 4,
429 bo, offset_bytes,
430 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
431 }
432
433 /* Creates a new WM constant buffer reflecting the current fragment program's
434 * constants, if needed by the fragment program.
435 *
436 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
437 * state atom.
438 */
439 static void
440 brw_upload_wm_pull_constants(struct brw_context *brw)
441 {
442 struct gl_context *ctx = &brw->ctx;
443 /* BRW_NEW_FRAGMENT_PROGRAM */
444 struct brw_fragment_program *fp =
445 (struct brw_fragment_program *) brw->fragment_program;
446 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
447 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
448 const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
449 float *constants;
450 unsigned int i;
451
452 _mesa_load_state_parameters(ctx, params);
453
454 /* CACHE_NEW_WM_PROG */
455 if (brw->wm.prog_data->nr_pull_params == 0) {
456 if (brw->wm.base.const_bo) {
457 drm_intel_bo_unreference(brw->wm.base.const_bo);
458 brw->wm.base.const_bo = NULL;
459 brw->wm.base.surf_offset[surf_index] = 0;
460 brw->state.dirty.brw |= BRW_NEW_SURFACES;
461 }
462 return;
463 }
464
465 drm_intel_bo_unreference(brw->wm.base.const_bo);
466 brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
467 size, 64);
468
469 /* _NEW_PROGRAM_CONSTANTS */
470 drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
471 constants = brw->wm.base.const_bo->virtual;
472 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
473 constants[i] = *brw->wm.prog_data->pull_param[i];
474 }
475 drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
476
477 brw->vtbl.create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
478 &brw->wm.base.surf_offset[surf_index],
479 true);
480
481 brw->state.dirty.brw |= BRW_NEW_SURFACES;
482 }
483
484 const struct brw_tracked_state brw_wm_pull_constants = {
485 .dirty = {
486 .mesa = (_NEW_PROGRAM_CONSTANTS),
487 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
488 .cache = CACHE_NEW_WM_PROG,
489 },
490 .emit = brw_upload_wm_pull_constants,
491 };
492
493 static void
494 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
495 {
496 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
497 * Notes):
498 *
499 * A null surface will be used in instances where an actual surface is
500 * not bound. When a write message is generated to a null surface, no
501 * actual surface is written to. When a read message (including any
502 * sampling engine message) is generated to a null surface, the result
503 * is all zeros. Note that a null surface type is allowed to be used
504 * with all messages, even if it is not specificially indicated as
505 * supported. All of the remaining fields in surface state are ignored
506 * for null surfaces, with the following exceptions:
507 *
508 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
509 * depth buffer’s corresponding state for all render target surfaces,
510 * including null.
511 *
512 * - Surface Format must be R8G8B8A8_UNORM.
513 */
514 struct gl_context *ctx = &brw->ctx;
515 uint32_t *surf;
516 unsigned surface_type = BRW_SURFACE_NULL;
517 drm_intel_bo *bo = NULL;
518 unsigned pitch_minus_1 = 0;
519 uint32_t multisampling_state = 0;
520
521 /* _NEW_BUFFERS */
522 const struct gl_framebuffer *fb = ctx->DrawBuffer;
523
524 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
525 &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
526
527 if (fb->Visual.samples > 1) {
528 /* On Gen6, null render targets seem to cause GPU hangs when
529 * multisampling. So work around this problem by rendering into dummy
530 * color buffer.
531 *
532 * To decrease the amount of memory needed by the workaround buffer, we
533 * set its pitch to 128 bytes (the width of a Y tile). This means that
534 * the amount of memory needed for the workaround buffer is
535 * (width_in_tiles + height_in_tiles - 1) tiles.
536 *
537 * Note that since the workaround buffer will be interpreted by the
538 * hardware as an interleaved multisampled buffer, we need to compute
539 * width_in_tiles and height_in_tiles by dividing the width and height
540 * by 16 rather than the normal Y-tile size of 32.
541 */
542 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
543 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
544 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
545 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
546 size_needed);
547 bo = brw->wm.multisampled_null_render_target_bo;
548 surface_type = BRW_SURFACE_2D;
549 pitch_minus_1 = 127;
550 multisampling_state =
551 brw_get_surface_num_multisamples(fb->Visual.samples);
552 }
553
554 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
555 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
556 if (brw->gen < 6) {
557 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
558 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
559 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
560 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
561 }
562 surf[1] = bo ? bo->offset : 0;
563 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
564 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
565
566 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
567 * Notes):
568 *
569 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
570 */
571 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
572 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
573 surf[4] = multisampling_state;
574 surf[5] = 0;
575
576 if (bo) {
577 drm_intel_bo_emit_reloc(brw->batch.bo,
578 brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
579 bo, 0,
580 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
581 }
582 }
583
584 /**
585 * Sets up a surface state structure to point at the given region.
586 * While it is only used for the front/back buffer currently, it should be
587 * usable for further buffers when doing ARB_draw_buffer support.
588 */
589 static void
590 brw_update_renderbuffer_surface(struct brw_context *brw,
591 struct gl_renderbuffer *rb,
592 bool layered,
593 unsigned int unit)
594 {
595 struct gl_context *ctx = &brw->ctx;
596 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
597 struct intel_mipmap_tree *mt = irb->mt;
598 struct intel_region *region;
599 uint32_t *surf;
600 uint32_t tile_x, tile_y;
601 uint32_t format = 0;
602 /* _NEW_BUFFERS */
603 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
604
605 assert(!layered);
606
607 if (rb->TexImage && !brw->has_surface_tile_offset) {
608 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
609
610 if (tile_x != 0 || tile_y != 0) {
611 /* Original gen4 hardware couldn't draw to a non-tile-aligned
612 * destination in a miptree unless you actually setup your renderbuffer
613 * as a miptree and used the fragile lod/array_index/etc. controls to
614 * select the image. So, instead, we just make a new single-level
615 * miptree and render into that.
616 */
617 intel_renderbuffer_move_to_temp(brw, irb, false);
618 mt = irb->mt;
619 }
620 }
621
622 intel_miptree_used_for_rendering(irb->mt);
623
624 region = irb->mt->region;
625
626 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
627 &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
628
629 format = brw->render_target_format[rb_format];
630 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
631 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
632 __FUNCTION__, _mesa_get_format_name(rb_format));
633 }
634
635 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
636 format << BRW_SURFACE_FORMAT_SHIFT);
637
638 /* reloc */
639 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
640 region->bo->offset);
641
642 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
643 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
644
645 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
646 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
647
648 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
649
650 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
651 /* Note that the low bits of these fields are missing, so
652 * there's the possibility of getting in trouble.
653 */
654 assert(tile_x % 4 == 0);
655 assert(tile_y % 2 == 0);
656 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
657 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
658 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
659
660 if (brw->gen < 6) {
661 /* _NEW_COLOR */
662 if (!ctx->Color.ColorLogicOpEnabled &&
663 (ctx->Color.BlendEnabled & (1 << unit)))
664 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
665
666 if (!ctx->Color.ColorMask[unit][0])
667 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
668 if (!ctx->Color.ColorMask[unit][1])
669 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
670 if (!ctx->Color.ColorMask[unit][2])
671 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
672
673 /* As mentioned above, disable writes to the alpha component when the
674 * renderbuffer is XRGB.
675 */
676 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
677 !ctx->Color.ColorMask[unit][3]) {
678 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
679 }
680 }
681
682 drm_intel_bo_emit_reloc(brw->batch.bo,
683 brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
684 region->bo,
685 surf[1] - region->bo->offset,
686 I915_GEM_DOMAIN_RENDER,
687 I915_GEM_DOMAIN_RENDER);
688 }
689
690 /**
691 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
692 */
693 static void
694 brw_update_renderbuffer_surfaces(struct brw_context *brw)
695 {
696 struct gl_context *ctx = &brw->ctx;
697 GLuint i;
698
699 /* _NEW_BUFFERS | _NEW_COLOR */
700 /* Update surfaces for drawing buffers */
701 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
702 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
703 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
704 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
705 ctx->DrawBuffer->Layered, i);
706 } else {
707 brw->vtbl.update_null_renderbuffer_surface(brw, i);
708 }
709 }
710 } else {
711 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
712 }
713 brw->state.dirty.brw |= BRW_NEW_SURFACES;
714 }
715
716 const struct brw_tracked_state brw_renderbuffer_surfaces = {
717 .dirty = {
718 .mesa = (_NEW_COLOR |
719 _NEW_BUFFERS),
720 .brw = BRW_NEW_BATCH,
721 .cache = 0
722 },
723 .emit = brw_update_renderbuffer_surfaces,
724 };
725
726 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
727 .dirty = {
728 .mesa = _NEW_BUFFERS,
729 .brw = BRW_NEW_BATCH,
730 .cache = 0
731 },
732 .emit = brw_update_renderbuffer_surfaces,
733 };
734
735
736 static void
737 update_stage_texture_surfaces(struct brw_context *brw,
738 const struct gl_program *prog,
739 uint32_t *surf_offset)
740 {
741 if (!prog)
742 return;
743
744 struct gl_context *ctx = &brw->ctx;
745
746 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
747
748 for (unsigned s = 0; s < num_samplers; s++) {
749 surf_offset[s] = 0;
750
751 if (prog->SamplersUsed & (1 << s)) {
752 const unsigned unit = prog->SamplerUnits[s];
753
754 /* _NEW_TEXTURE */
755 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
756 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s);
757 }
758 }
759 }
760 }
761
762
763 /**
764 * Construct SURFACE_STATE objects for enabled textures.
765 */
766 static void
767 brw_update_texture_surfaces(struct brw_context *brw)
768 {
769 /* BRW_NEW_VERTEX_PROGRAM */
770 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
771
772 /* BRW_NEW_GEOMETRY_PROGRAM */
773 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
774
775 /* BRW_NEW_FRAGMENT_PROGRAM */
776 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
777
778 /* _NEW_TEXTURE */
779 update_stage_texture_surfaces(brw, vs,
780 brw->vs.base.surf_offset +
781 SURF_INDEX_VEC4_TEXTURE(0));
782 update_stage_texture_surfaces(brw, gs,
783 brw->gs.base.surf_offset +
784 SURF_INDEX_VEC4_TEXTURE(0));
785 update_stage_texture_surfaces(brw, fs,
786 brw->wm.base.surf_offset +
787 SURF_INDEX_TEXTURE(0));
788
789 brw->state.dirty.brw |= BRW_NEW_SURFACES;
790 }
791
792 const struct brw_tracked_state brw_texture_surfaces = {
793 .dirty = {
794 .mesa = _NEW_TEXTURE,
795 .brw = BRW_NEW_BATCH |
796 BRW_NEW_VERTEX_PROGRAM |
797 BRW_NEW_GEOMETRY_PROGRAM |
798 BRW_NEW_FRAGMENT_PROGRAM,
799 .cache = 0
800 },
801 .emit = brw_update_texture_surfaces,
802 };
803
804 void
805 brw_upload_ubo_surfaces(struct brw_context *brw,
806 struct gl_shader *shader,
807 uint32_t *surf_offsets)
808 {
809 struct gl_context *ctx = &brw->ctx;
810
811 if (!shader)
812 return;
813
814 for (int i = 0; i < shader->NumUniformBlocks; i++) {
815 struct gl_uniform_buffer_binding *binding;
816 struct intel_buffer_object *intel_bo;
817
818 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
819 intel_bo = intel_buffer_object(binding->BufferObject);
820 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
821
822 /* Because behavior for referencing outside of the binding's size in the
823 * glBindBufferRange case is undefined, we can just bind the whole buffer
824 * glBindBufferBase wants and be a correct implementation.
825 */
826 brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
827 bo->size - binding->Offset,
828 &surf_offsets[i],
829 shader->Type == GL_FRAGMENT_SHADER);
830 }
831
832 if (shader->NumUniformBlocks)
833 brw->state.dirty.brw |= BRW_NEW_SURFACES;
834 }
835
836 static void
837 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
838 {
839 struct gl_context *ctx = &brw->ctx;
840 /* _NEW_PROGRAM */
841 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
842
843 if (!prog)
844 return;
845
846 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
847 &brw->wm.base.surf_offset[SURF_INDEX_WM_UBO(0)]);
848 }
849
850 const struct brw_tracked_state brw_wm_ubo_surfaces = {
851 .dirty = {
852 .mesa = _NEW_PROGRAM,
853 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
854 .cache = 0,
855 },
856 .emit = brw_upload_wm_ubo_surfaces,
857 };
858
859 /**
860 * Constructs the binding table for the WM surface state, which maps unit
861 * numbers to surface state objects.
862 */
863 static void
864 brw_upload_wm_binding_table(struct brw_context *brw)
865 {
866 uint32_t *bind;
867 int i;
868
869 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
870 gen7_create_shader_time_surface(brw, &brw->wm.base.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
871 }
872
873 /* CACHE_NEW_WM_PROG */
874 unsigned entries = brw->wm.prog_data->binding_table_size;
875 bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
876 sizeof(uint32_t) * entries,
877 32, &brw->wm.base.bind_bo_offset);
878
879 /* BRW_NEW_SURFACES */
880 for (i = 0; i < entries; i++) {
881 bind[i] = brw->wm.base.surf_offset[i];
882 }
883
884 brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
885 }
886
887 const struct brw_tracked_state brw_wm_binding_table = {
888 .dirty = {
889 .mesa = 0,
890 .brw = (BRW_NEW_BATCH |
891 BRW_NEW_SURFACES),
892 .cache = CACHE_NEW_WM_PROG
893 },
894 .emit = brw_upload_wm_binding_table,
895 };
896
897 void
898 gen4_init_vtable_surface_functions(struct brw_context *brw)
899 {
900 brw->vtbl.update_texture_surface = brw_update_texture_surface;
901 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
902 brw->vtbl.update_null_renderbuffer_surface =
903 brw_update_null_renderbuffer_surface;
904 brw->vtbl.create_constant_surface = brw_create_constant_surface;
905 }