i965: Define vtbl method that initializes an untyped R/W surface.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193 static void
194 gen4_emit_buffer_surface_state(struct brw_context *brw,
195 uint32_t *out_offset,
196 drm_intel_bo *bo,
197 unsigned buffer_offset,
198 unsigned surface_format,
199 unsigned buffer_size,
200 unsigned pitch)
201 {
202 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
203 6 * 4, 32, out_offset);
204 memset(surf, 0, 6 * 4);
205
206 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
207 surface_format << BRW_SURFACE_FORMAT_SHIFT |
208 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
209 surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
210 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
211 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
212 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
213 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
214
215 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
216 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
217 * physical cache. It is mapped in hardware to the sampler cache."
218 */
219 if (bo) {
220 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
221 bo, buffer_offset,
222 I915_GEM_DOMAIN_SAMPLER, 0);
223 }
224 }
225
226 static void
227 brw_update_buffer_texture_surface(struct gl_context *ctx,
228 unsigned unit,
229 uint32_t *surf_offset)
230 {
231 struct brw_context *brw = brw_context(ctx);
232 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
233 struct intel_buffer_object *intel_obj =
234 intel_buffer_object(tObj->BufferObject);
235 uint32_t size = tObj->BufferSize;
236 drm_intel_bo *bo = NULL;
237 gl_format format = tObj->_BufferObjectFormat;
238 uint32_t brw_format = brw_format_for_mesa_format(format);
239 int texel_size = _mesa_get_format_bytes(format);
240
241 if (intel_obj) {
242 size = MIN2(size, intel_obj->Base.Size);
243 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
244 }
245
246 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
247 _mesa_problem(NULL, "bad format %s for texture buffer\n",
248 _mesa_get_format_name(format));
249 }
250
251 gen4_emit_buffer_surface_state(brw, surf_offset, bo,
252 tObj->BufferOffset,
253 brw_format,
254 size / texel_size,
255 texel_size);
256 }
257
258 static void
259 brw_update_texture_surface(struct gl_context *ctx,
260 unsigned unit,
261 uint32_t *surf_offset,
262 bool for_gather)
263 {
264 struct brw_context *brw = brw_context(ctx);
265 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
266 struct intel_texture_object *intelObj = intel_texture_object(tObj);
267 struct intel_mipmap_tree *mt = intelObj->mt;
268 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
269 uint32_t *surf;
270
271 /* BRW_NEW_UNIFORM_BUFFER */
272 if (tObj->Target == GL_TEXTURE_BUFFER) {
273 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
274 return;
275 }
276
277 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
278 6 * 4, 32, surf_offset);
279
280 (void) for_gather; /* no w/a to apply for this gen */
281
282 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
283 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
284 BRW_SURFACE_CUBEFACE_ENABLES |
285 (translate_tex_format(brw,
286 mt->format,
287 tObj->DepthMode,
288 sampler->sRGBDecode) <<
289 BRW_SURFACE_FORMAT_SHIFT));
290
291 surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
292
293 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
294 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
295 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
296
297 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
298 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
299 (intelObj->mt->region->pitch - 1) <<
300 BRW_SURFACE_PITCH_SHIFT);
301
302 surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
303 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
304
305 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
306
307 /* Emit relocation to surface contents */
308 drm_intel_bo_emit_reloc(brw->batch.bo,
309 *surf_offset + 4,
310 intelObj->mt->region->bo,
311 surf[1] - intelObj->mt->region->bo->offset,
312 I915_GEM_DOMAIN_SAMPLER, 0);
313 }
314
315 /**
316 * Create the constant buffer surface. Vertex/fragment shader constants will be
317 * read from this buffer with Data Port Read instructions/messages.
318 */
319 static void
320 brw_create_constant_surface(struct brw_context *brw,
321 drm_intel_bo *bo,
322 uint32_t offset,
323 uint32_t size,
324 uint32_t *out_offset,
325 bool dword_pitch)
326 {
327 uint32_t stride = dword_pitch ? 4 : 16;
328 uint32_t elements = ALIGN(size, stride) / stride;
329
330 gen4_emit_buffer_surface_state(brw, out_offset, bo, offset,
331 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
332 elements, stride);
333 }
334
335 /**
336 * Set up a binding table entry for use by stream output logic (transform
337 * feedback).
338 *
339 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
340 */
341 void
342 brw_update_sol_surface(struct brw_context *brw,
343 struct gl_buffer_object *buffer_obj,
344 uint32_t *out_offset, unsigned num_vector_components,
345 unsigned stride_dwords, unsigned offset_dwords)
346 {
347 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
348 uint32_t offset_bytes = 4 * offset_dwords;
349 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
350 offset_bytes,
351 buffer_obj->Size - offset_bytes);
352 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
353 out_offset);
354 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
355 size_t size_dwords = buffer_obj->Size / 4;
356 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
357
358 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
359 * too big to map using a single binding table entry?
360 */
361 assert((size_dwords - offset_dwords) / stride_dwords
362 <= BRW_MAX_NUM_BUFFER_ENTRIES);
363
364 if (size_dwords > offset_dwords + num_vector_components) {
365 /* There is room for at least 1 transform feedback output in the buffer.
366 * Compute the number of additional transform feedback outputs the
367 * buffer has room for.
368 */
369 buffer_size_minus_1 =
370 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
371 } else {
372 /* There isn't even room for a single transform feedback output in the
373 * buffer. We can't configure the binding table entry to prevent output
374 * entirely; we'll have to rely on the geometry shader to detect
375 * overflow. But to minimize the damage in case of a bug, set up the
376 * binding table entry to just allow a single output.
377 */
378 buffer_size_minus_1 = 0;
379 }
380 width = buffer_size_minus_1 & 0x7f;
381 height = (buffer_size_minus_1 & 0xfff80) >> 7;
382 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
383
384 switch (num_vector_components) {
385 case 1:
386 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
387 break;
388 case 2:
389 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
390 break;
391 case 3:
392 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
393 break;
394 case 4:
395 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
396 break;
397 default:
398 assert(!"Invalid vector size for transform feedback output");
399 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
400 break;
401 }
402
403 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
404 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
405 surface_format << BRW_SURFACE_FORMAT_SHIFT |
406 BRW_SURFACE_RC_READ_WRITE;
407 surf[1] = bo->offset + offset_bytes; /* reloc */
408 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
409 height << BRW_SURFACE_HEIGHT_SHIFT);
410 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
411 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
412 surf[4] = 0;
413 surf[5] = 0;
414
415 /* Emit relocation to surface contents. */
416 drm_intel_bo_emit_reloc(brw->batch.bo,
417 *out_offset + 4,
418 bo, offset_bytes,
419 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
420 }
421
422 /* Creates a new WM constant buffer reflecting the current fragment program's
423 * constants, if needed by the fragment program.
424 *
425 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
426 * state atom.
427 */
428 static void
429 brw_upload_wm_pull_constants(struct brw_context *brw)
430 {
431 struct gl_context *ctx = &brw->ctx;
432 /* BRW_NEW_FRAGMENT_PROGRAM */
433 struct brw_fragment_program *fp =
434 (struct brw_fragment_program *) brw->fragment_program;
435 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
436 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
437 const int surf_index =
438 brw->wm.prog_data->base.binding_table.pull_constants_start;
439 float *constants;
440 unsigned int i;
441
442 _mesa_load_state_parameters(ctx, params);
443
444 /* CACHE_NEW_WM_PROG */
445 if (brw->wm.prog_data->nr_pull_params == 0) {
446 if (brw->wm.base.const_bo) {
447 drm_intel_bo_unreference(brw->wm.base.const_bo);
448 brw->wm.base.const_bo = NULL;
449 brw->wm.base.surf_offset[surf_index] = 0;
450 brw->state.dirty.brw |= BRW_NEW_SURFACES;
451 }
452 return;
453 }
454
455 drm_intel_bo_unreference(brw->wm.base.const_bo);
456 brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
457 size, 64);
458
459 /* _NEW_PROGRAM_CONSTANTS */
460 drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
461 constants = brw->wm.base.const_bo->virtual;
462 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
463 constants[i] = *brw->wm.prog_data->pull_param[i];
464 }
465 drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
466
467 brw->vtbl.create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
468 &brw->wm.base.surf_offset[surf_index],
469 true);
470
471 brw->state.dirty.brw |= BRW_NEW_SURFACES;
472 }
473
474 const struct brw_tracked_state brw_wm_pull_constants = {
475 .dirty = {
476 .mesa = (_NEW_PROGRAM_CONSTANTS),
477 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
478 .cache = CACHE_NEW_WM_PROG,
479 },
480 .emit = brw_upload_wm_pull_constants,
481 };
482
483 static void
484 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
485 {
486 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
487 * Notes):
488 *
489 * A null surface will be used in instances where an actual surface is
490 * not bound. When a write message is generated to a null surface, no
491 * actual surface is written to. When a read message (including any
492 * sampling engine message) is generated to a null surface, the result
493 * is all zeros. Note that a null surface type is allowed to be used
494 * with all messages, even if it is not specificially indicated as
495 * supported. All of the remaining fields in surface state are ignored
496 * for null surfaces, with the following exceptions:
497 *
498 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
499 * depth buffer’s corresponding state for all render target surfaces,
500 * including null.
501 *
502 * - Surface Format must be R8G8B8A8_UNORM.
503 */
504 struct gl_context *ctx = &brw->ctx;
505 uint32_t *surf;
506 unsigned surface_type = BRW_SURFACE_NULL;
507 drm_intel_bo *bo = NULL;
508 unsigned pitch_minus_1 = 0;
509 uint32_t multisampling_state = 0;
510 uint32_t surf_index =
511 brw->wm.prog_data->binding_table.render_target_start + unit;
512
513 /* _NEW_BUFFERS */
514 const struct gl_framebuffer *fb = ctx->DrawBuffer;
515
516 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
517 &brw->wm.base.surf_offset[surf_index]);
518
519 if (fb->Visual.samples > 1) {
520 /* On Gen6, null render targets seem to cause GPU hangs when
521 * multisampling. So work around this problem by rendering into dummy
522 * color buffer.
523 *
524 * To decrease the amount of memory needed by the workaround buffer, we
525 * set its pitch to 128 bytes (the width of a Y tile). This means that
526 * the amount of memory needed for the workaround buffer is
527 * (width_in_tiles + height_in_tiles - 1) tiles.
528 *
529 * Note that since the workaround buffer will be interpreted by the
530 * hardware as an interleaved multisampled buffer, we need to compute
531 * width_in_tiles and height_in_tiles by dividing the width and height
532 * by 16 rather than the normal Y-tile size of 32.
533 */
534 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
535 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
536 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
537 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
538 size_needed);
539 bo = brw->wm.multisampled_null_render_target_bo;
540 surface_type = BRW_SURFACE_2D;
541 pitch_minus_1 = 127;
542 multisampling_state =
543 brw_get_surface_num_multisamples(fb->Visual.samples);
544 }
545
546 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
547 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
548 if (brw->gen < 6) {
549 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
550 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
551 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
552 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
553 }
554 surf[1] = bo ? bo->offset : 0;
555 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
556 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
557
558 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
559 * Notes):
560 *
561 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
562 */
563 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
564 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
565 surf[4] = multisampling_state;
566 surf[5] = 0;
567
568 if (bo) {
569 drm_intel_bo_emit_reloc(brw->batch.bo,
570 brw->wm.base.surf_offset[surf_index] + 4,
571 bo, 0,
572 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
573 }
574 }
575
576 /**
577 * Sets up a surface state structure to point at the given region.
578 * While it is only used for the front/back buffer currently, it should be
579 * usable for further buffers when doing ARB_draw_buffer support.
580 */
581 static void
582 brw_update_renderbuffer_surface(struct brw_context *brw,
583 struct gl_renderbuffer *rb,
584 bool layered,
585 unsigned int unit)
586 {
587 struct gl_context *ctx = &brw->ctx;
588 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
589 struct intel_mipmap_tree *mt = irb->mt;
590 struct intel_region *region;
591 uint32_t *surf;
592 uint32_t tile_x, tile_y;
593 uint32_t format = 0;
594 /* _NEW_BUFFERS */
595 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
596 uint32_t surf_index =
597 brw->wm.prog_data->binding_table.render_target_start + unit;
598
599 assert(!layered);
600
601 if (rb->TexImage && !brw->has_surface_tile_offset) {
602 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
603
604 if (tile_x != 0 || tile_y != 0) {
605 /* Original gen4 hardware couldn't draw to a non-tile-aligned
606 * destination in a miptree unless you actually setup your renderbuffer
607 * as a miptree and used the fragile lod/array_index/etc. controls to
608 * select the image. So, instead, we just make a new single-level
609 * miptree and render into that.
610 */
611 intel_renderbuffer_move_to_temp(brw, irb, false);
612 mt = irb->mt;
613 }
614 }
615
616 intel_miptree_used_for_rendering(irb->mt);
617
618 region = irb->mt->region;
619
620 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
621 &brw->wm.base.surf_offset[surf_index]);
622
623 format = brw->render_target_format[rb_format];
624 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
625 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
626 __FUNCTION__, _mesa_get_format_name(rb_format));
627 }
628
629 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
630 format << BRW_SURFACE_FORMAT_SHIFT);
631
632 /* reloc */
633 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
634 region->bo->offset);
635
636 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
637 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
638
639 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
640 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
641
642 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
643
644 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
645 /* Note that the low bits of these fields are missing, so
646 * there's the possibility of getting in trouble.
647 */
648 assert(tile_x % 4 == 0);
649 assert(tile_y % 2 == 0);
650 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
651 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
652 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
653
654 if (brw->gen < 6) {
655 /* _NEW_COLOR */
656 if (!ctx->Color.ColorLogicOpEnabled &&
657 (ctx->Color.BlendEnabled & (1 << unit)))
658 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
659
660 if (!ctx->Color.ColorMask[unit][0])
661 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
662 if (!ctx->Color.ColorMask[unit][1])
663 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
664 if (!ctx->Color.ColorMask[unit][2])
665 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
666
667 /* As mentioned above, disable writes to the alpha component when the
668 * renderbuffer is XRGB.
669 */
670 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
671 !ctx->Color.ColorMask[unit][3]) {
672 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
673 }
674 }
675
676 drm_intel_bo_emit_reloc(brw->batch.bo,
677 brw->wm.base.surf_offset[surf_index] + 4,
678 region->bo,
679 surf[1] - region->bo->offset,
680 I915_GEM_DOMAIN_RENDER,
681 I915_GEM_DOMAIN_RENDER);
682 }
683
684 /**
685 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
686 */
687 static void
688 brw_update_renderbuffer_surfaces(struct brw_context *brw)
689 {
690 struct gl_context *ctx = &brw->ctx;
691 GLuint i;
692
693 /* _NEW_BUFFERS | _NEW_COLOR */
694 /* Update surfaces for drawing buffers */
695 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
696 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
697 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
698 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
699 ctx->DrawBuffer->Layered, i);
700 } else {
701 brw->vtbl.update_null_renderbuffer_surface(brw, i);
702 }
703 }
704 } else {
705 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
706 }
707 brw->state.dirty.brw |= BRW_NEW_SURFACES;
708 }
709
710 const struct brw_tracked_state brw_renderbuffer_surfaces = {
711 .dirty = {
712 .mesa = (_NEW_COLOR |
713 _NEW_BUFFERS),
714 .brw = BRW_NEW_BATCH,
715 .cache = 0
716 },
717 .emit = brw_update_renderbuffer_surfaces,
718 };
719
720 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
721 .dirty = {
722 .mesa = _NEW_BUFFERS,
723 .brw = BRW_NEW_BATCH,
724 .cache = 0
725 },
726 .emit = brw_update_renderbuffer_surfaces,
727 };
728
729
730 static void
731 update_stage_texture_surfaces(struct brw_context *brw,
732 const struct gl_program *prog,
733 struct brw_stage_state *stage_state,
734 bool for_gather)
735 {
736 if (!prog)
737 return;
738
739 struct gl_context *ctx = &brw->ctx;
740
741 uint32_t *surf_offset = stage_state->surf_offset;
742 if (for_gather)
743 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
744 else
745 surf_offset += stage_state->prog_data->binding_table.texture_start;
746
747 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
748 for (unsigned s = 0; s < num_samplers; s++) {
749 surf_offset[s] = 0;
750
751 if (prog->SamplersUsed & (1 << s)) {
752 const unsigned unit = prog->SamplerUnits[s];
753
754 /* _NEW_TEXTURE */
755 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
756 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
757 }
758 }
759 }
760 }
761
762
763 /**
764 * Construct SURFACE_STATE objects for enabled textures.
765 */
766 static void
767 brw_update_texture_surfaces(struct brw_context *brw)
768 {
769 /* BRW_NEW_VERTEX_PROGRAM */
770 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
771
772 /* BRW_NEW_GEOMETRY_PROGRAM */
773 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
774
775 /* BRW_NEW_FRAGMENT_PROGRAM */
776 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
777
778 /* _NEW_TEXTURE */
779 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
780 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
781 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
782
783 /* emit alternate set of surface state for gather. this
784 * allows the surface format to be overriden for only the
785 * gather4 messages. */
786 if (vs && vs->UsesGather)
787 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
788 if (gs && gs->UsesGather)
789 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
790 if (fs && fs->UsesGather)
791 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
792
793 brw->state.dirty.brw |= BRW_NEW_SURFACES;
794 }
795
796 const struct brw_tracked_state brw_texture_surfaces = {
797 .dirty = {
798 .mesa = _NEW_TEXTURE,
799 .brw = BRW_NEW_BATCH |
800 BRW_NEW_UNIFORM_BUFFER |
801 BRW_NEW_VERTEX_PROGRAM |
802 BRW_NEW_GEOMETRY_PROGRAM |
803 BRW_NEW_FRAGMENT_PROGRAM,
804 .cache = 0
805 },
806 .emit = brw_update_texture_surfaces,
807 };
808
809 void
810 brw_upload_ubo_surfaces(struct brw_context *brw,
811 struct gl_shader *shader,
812 struct brw_stage_state *stage_state,
813 struct brw_stage_prog_data *prog_data)
814 {
815 struct gl_context *ctx = &brw->ctx;
816
817 if (!shader)
818 return;
819
820 uint32_t *surf_offsets =
821 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
822
823 for (int i = 0; i < shader->NumUniformBlocks; i++) {
824 struct gl_uniform_buffer_binding *binding;
825 struct intel_buffer_object *intel_bo;
826
827 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
828 intel_bo = intel_buffer_object(binding->BufferObject);
829 drm_intel_bo *bo =
830 intel_bufferobj_buffer(brw, intel_bo,
831 binding->Offset,
832 binding->BufferObject->Size - binding->Offset);
833
834 /* Because behavior for referencing outside of the binding's size in the
835 * glBindBufferRange case is undefined, we can just bind the whole buffer
836 * glBindBufferBase wants and be a correct implementation.
837 */
838 brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
839 bo->size - binding->Offset,
840 &surf_offsets[i],
841 shader->Type == GL_FRAGMENT_SHADER);
842 }
843
844 if (shader->NumUniformBlocks)
845 brw->state.dirty.brw |= BRW_NEW_SURFACES;
846 }
847
848 static void
849 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
850 {
851 struct gl_context *ctx = &brw->ctx;
852 /* _NEW_PROGRAM */
853 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
854
855 if (!prog)
856 return;
857
858 /* CACHE_NEW_WM_PROG */
859 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
860 &brw->wm.base, &brw->wm.prog_data->base);
861 }
862
863 const struct brw_tracked_state brw_wm_ubo_surfaces = {
864 .dirty = {
865 .mesa = _NEW_PROGRAM,
866 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
867 .cache = CACHE_NEW_WM_PROG,
868 },
869 .emit = brw_upload_wm_ubo_surfaces,
870 };
871
872 void
873 gen4_init_vtable_surface_functions(struct brw_context *brw)
874 {
875 brw->vtbl.update_texture_surface = brw_update_texture_surface;
876 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
877 brw->vtbl.update_null_renderbuffer_surface =
878 brw_update_null_renderbuffer_surface;
879 brw->vtbl.create_constant_surface = brw_create_constant_surface;
880 }