4c3eb69716722610273a0d81018d0da2b6c353fe
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193 static void
194 gen4_emit_buffer_surface_state(struct brw_context *brw,
195 uint32_t *out_offset,
196 drm_intel_bo *bo,
197 unsigned buffer_offset,
198 unsigned surface_format,
199 unsigned buffer_size,
200 unsigned pitch)
201 {
202 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
203 6 * 4, 32, out_offset);
204 memset(surf, 0, 6 * 4);
205
206 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
207 surface_format << BRW_SURFACE_FORMAT_SHIFT |
208 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
209 surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
210 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
211 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
212 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
213 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
214
215 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
216 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
217 * physical cache. It is mapped in hardware to the sampler cache."
218 */
219 if (bo) {
220 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
221 bo, buffer_offset,
222 I915_GEM_DOMAIN_SAMPLER, 0);
223 }
224 }
225
226 static void
227 brw_update_buffer_texture_surface(struct gl_context *ctx,
228 unsigned unit,
229 uint32_t *surf_offset)
230 {
231 struct brw_context *brw = brw_context(ctx);
232 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
233 struct intel_buffer_object *intel_obj =
234 intel_buffer_object(tObj->BufferObject);
235 drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
236 gl_format format = tObj->_BufferObjectFormat;
237 uint32_t brw_format = brw_format_for_mesa_format(format);
238 int texel_size = _mesa_get_format_bytes(format);
239 int w = intel_obj ? intel_obj->Base.Size / texel_size : 0;
240
241 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
242 _mesa_problem(NULL, "bad format %s for texture buffer\n",
243 _mesa_get_format_name(format));
244 }
245
246 gen4_emit_buffer_surface_state(brw, surf_offset, bo, 0,
247 brw_format,
248 w, texel_size);
249 }
250
251 static void
252 brw_update_texture_surface(struct gl_context *ctx,
253 unsigned unit,
254 uint32_t *surf_offset)
255 {
256 struct brw_context *brw = brw_context(ctx);
257 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
258 struct intel_texture_object *intelObj = intel_texture_object(tObj);
259 struct intel_mipmap_tree *mt = intelObj->mt;
260 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
261 uint32_t *surf;
262
263 if (tObj->Target == GL_TEXTURE_BUFFER) {
264 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
265 return;
266 }
267
268 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
269 6 * 4, 32, surf_offset);
270
271 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
272 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
273 BRW_SURFACE_CUBEFACE_ENABLES |
274 (translate_tex_format(brw,
275 mt->format,
276 tObj->DepthMode,
277 sampler->sRGBDecode) <<
278 BRW_SURFACE_FORMAT_SHIFT));
279
280 surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
281
282 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
283 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
284 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
285
286 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
287 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
288 (intelObj->mt->region->pitch - 1) <<
289 BRW_SURFACE_PITCH_SHIFT);
290
291 surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
292 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
293
294 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
295
296 /* Emit relocation to surface contents */
297 drm_intel_bo_emit_reloc(brw->batch.bo,
298 *surf_offset + 4,
299 intelObj->mt->region->bo,
300 surf[1] - intelObj->mt->region->bo->offset,
301 I915_GEM_DOMAIN_SAMPLER, 0);
302 }
303
304 /**
305 * Create the constant buffer surface. Vertex/fragment shader constants will be
306 * read from this buffer with Data Port Read instructions/messages.
307 */
308 static void
309 brw_create_constant_surface(struct brw_context *brw,
310 drm_intel_bo *bo,
311 uint32_t offset,
312 uint32_t size,
313 uint32_t *out_offset,
314 bool dword_pitch)
315 {
316 uint32_t stride = dword_pitch ? 4 : 16;
317 uint32_t elements = ALIGN(size, stride) / stride;
318
319 gen4_emit_buffer_surface_state(brw, out_offset, bo, offset,
320 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
321 elements, stride);
322 }
323
324 /**
325 * Set up a binding table entry for use by stream output logic (transform
326 * feedback).
327 *
328 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
329 */
330 void
331 brw_update_sol_surface(struct brw_context *brw,
332 struct gl_buffer_object *buffer_obj,
333 uint32_t *out_offset, unsigned num_vector_components,
334 unsigned stride_dwords, unsigned offset_dwords)
335 {
336 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
337 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
338 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
339 out_offset);
340 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
341 uint32_t offset_bytes = 4 * offset_dwords;
342 size_t size_dwords = buffer_obj->Size / 4;
343 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
344
345 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
346 * too big to map using a single binding table entry?
347 */
348 assert((size_dwords - offset_dwords) / stride_dwords
349 <= BRW_MAX_NUM_BUFFER_ENTRIES);
350
351 if (size_dwords > offset_dwords + num_vector_components) {
352 /* There is room for at least 1 transform feedback output in the buffer.
353 * Compute the number of additional transform feedback outputs the
354 * buffer has room for.
355 */
356 buffer_size_minus_1 =
357 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
358 } else {
359 /* There isn't even room for a single transform feedback output in the
360 * buffer. We can't configure the binding table entry to prevent output
361 * entirely; we'll have to rely on the geometry shader to detect
362 * overflow. But to minimize the damage in case of a bug, set up the
363 * binding table entry to just allow a single output.
364 */
365 buffer_size_minus_1 = 0;
366 }
367 width = buffer_size_minus_1 & 0x7f;
368 height = (buffer_size_minus_1 & 0xfff80) >> 7;
369 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
370
371 switch (num_vector_components) {
372 case 1:
373 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
374 break;
375 case 2:
376 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
377 break;
378 case 3:
379 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
380 break;
381 case 4:
382 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
383 break;
384 default:
385 assert(!"Invalid vector size for transform feedback output");
386 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
387 break;
388 }
389
390 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
391 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
392 surface_format << BRW_SURFACE_FORMAT_SHIFT |
393 BRW_SURFACE_RC_READ_WRITE;
394 surf[1] = bo->offset + offset_bytes; /* reloc */
395 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
396 height << BRW_SURFACE_HEIGHT_SHIFT);
397 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
398 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
399 surf[4] = 0;
400 surf[5] = 0;
401
402 /* Emit relocation to surface contents. */
403 drm_intel_bo_emit_reloc(brw->batch.bo,
404 *out_offset + 4,
405 bo, offset_bytes,
406 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
407 }
408
409 /* Creates a new WM constant buffer reflecting the current fragment program's
410 * constants, if needed by the fragment program.
411 *
412 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
413 * state atom.
414 */
415 static void
416 brw_upload_wm_pull_constants(struct brw_context *brw)
417 {
418 struct gl_context *ctx = &brw->ctx;
419 /* BRW_NEW_FRAGMENT_PROGRAM */
420 struct brw_fragment_program *fp =
421 (struct brw_fragment_program *) brw->fragment_program;
422 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
423 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
424 const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
425 float *constants;
426 unsigned int i;
427
428 _mesa_load_state_parameters(ctx, params);
429
430 /* CACHE_NEW_WM_PROG */
431 if (brw->wm.prog_data->nr_pull_params == 0) {
432 if (brw->wm.base.const_bo) {
433 drm_intel_bo_unreference(brw->wm.base.const_bo);
434 brw->wm.base.const_bo = NULL;
435 brw->wm.base.surf_offset[surf_index] = 0;
436 brw->state.dirty.brw |= BRW_NEW_SURFACES;
437 }
438 return;
439 }
440
441 drm_intel_bo_unreference(brw->wm.base.const_bo);
442 brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
443 size, 64);
444
445 /* _NEW_PROGRAM_CONSTANTS */
446 drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
447 constants = brw->wm.base.const_bo->virtual;
448 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
449 constants[i] = *brw->wm.prog_data->pull_param[i];
450 }
451 drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
452
453 brw->vtbl.create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
454 &brw->wm.base.surf_offset[surf_index],
455 true);
456
457 brw->state.dirty.brw |= BRW_NEW_SURFACES;
458 }
459
460 const struct brw_tracked_state brw_wm_pull_constants = {
461 .dirty = {
462 .mesa = (_NEW_PROGRAM_CONSTANTS),
463 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
464 .cache = CACHE_NEW_WM_PROG,
465 },
466 .emit = brw_upload_wm_pull_constants,
467 };
468
469 static void
470 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
471 {
472 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
473 * Notes):
474 *
475 * A null surface will be used in instances where an actual surface is
476 * not bound. When a write message is generated to a null surface, no
477 * actual surface is written to. When a read message (including any
478 * sampling engine message) is generated to a null surface, the result
479 * is all zeros. Note that a null surface type is allowed to be used
480 * with all messages, even if it is not specificially indicated as
481 * supported. All of the remaining fields in surface state are ignored
482 * for null surfaces, with the following exceptions:
483 *
484 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
485 * depth buffer’s corresponding state for all render target surfaces,
486 * including null.
487 *
488 * - Surface Format must be R8G8B8A8_UNORM.
489 */
490 struct gl_context *ctx = &brw->ctx;
491 uint32_t *surf;
492 unsigned surface_type = BRW_SURFACE_NULL;
493 drm_intel_bo *bo = NULL;
494 unsigned pitch_minus_1 = 0;
495 uint32_t multisampling_state = 0;
496
497 /* _NEW_BUFFERS */
498 const struct gl_framebuffer *fb = ctx->DrawBuffer;
499
500 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
501 &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
502
503 if (fb->Visual.samples > 1) {
504 /* On Gen6, null render targets seem to cause GPU hangs when
505 * multisampling. So work around this problem by rendering into dummy
506 * color buffer.
507 *
508 * To decrease the amount of memory needed by the workaround buffer, we
509 * set its pitch to 128 bytes (the width of a Y tile). This means that
510 * the amount of memory needed for the workaround buffer is
511 * (width_in_tiles + height_in_tiles - 1) tiles.
512 *
513 * Note that since the workaround buffer will be interpreted by the
514 * hardware as an interleaved multisampled buffer, we need to compute
515 * width_in_tiles and height_in_tiles by dividing the width and height
516 * by 16 rather than the normal Y-tile size of 32.
517 */
518 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
519 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
520 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
521 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
522 size_needed);
523 bo = brw->wm.multisampled_null_render_target_bo;
524 surface_type = BRW_SURFACE_2D;
525 pitch_minus_1 = 127;
526 multisampling_state =
527 brw_get_surface_num_multisamples(fb->Visual.samples);
528 }
529
530 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
531 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
532 if (brw->gen < 6) {
533 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
534 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
535 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
536 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
537 }
538 surf[1] = bo ? bo->offset : 0;
539 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
540 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
541
542 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
543 * Notes):
544 *
545 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
546 */
547 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
548 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
549 surf[4] = multisampling_state;
550 surf[5] = 0;
551
552 if (bo) {
553 drm_intel_bo_emit_reloc(brw->batch.bo,
554 brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
555 bo, 0,
556 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
557 }
558 }
559
560 /**
561 * Sets up a surface state structure to point at the given region.
562 * While it is only used for the front/back buffer currently, it should be
563 * usable for further buffers when doing ARB_draw_buffer support.
564 */
565 static void
566 brw_update_renderbuffer_surface(struct brw_context *brw,
567 struct gl_renderbuffer *rb,
568 bool layered,
569 unsigned int unit)
570 {
571 struct gl_context *ctx = &brw->ctx;
572 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
573 struct intel_mipmap_tree *mt = irb->mt;
574 struct intel_region *region;
575 uint32_t *surf;
576 uint32_t tile_x, tile_y;
577 uint32_t format = 0;
578 /* _NEW_BUFFERS */
579 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
580
581 assert(!layered);
582
583 if (rb->TexImage && !brw->has_surface_tile_offset) {
584 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
585
586 if (tile_x != 0 || tile_y != 0) {
587 /* Original gen4 hardware couldn't draw to a non-tile-aligned
588 * destination in a miptree unless you actually setup your renderbuffer
589 * as a miptree and used the fragile lod/array_index/etc. controls to
590 * select the image. So, instead, we just make a new single-level
591 * miptree and render into that.
592 */
593 intel_renderbuffer_move_to_temp(brw, irb, false);
594 mt = irb->mt;
595 }
596 }
597
598 intel_miptree_used_for_rendering(irb->mt);
599
600 region = irb->mt->region;
601
602 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
603 &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
604
605 format = brw->render_target_format[rb_format];
606 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
607 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
608 __FUNCTION__, _mesa_get_format_name(rb_format));
609 }
610
611 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
612 format << BRW_SURFACE_FORMAT_SHIFT);
613
614 /* reloc */
615 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
616 region->bo->offset);
617
618 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
619 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
620
621 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
622 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
623
624 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
625
626 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
627 /* Note that the low bits of these fields are missing, so
628 * there's the possibility of getting in trouble.
629 */
630 assert(tile_x % 4 == 0);
631 assert(tile_y % 2 == 0);
632 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
633 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
634 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
635
636 if (brw->gen < 6) {
637 /* _NEW_COLOR */
638 if (!ctx->Color.ColorLogicOpEnabled &&
639 (ctx->Color.BlendEnabled & (1 << unit)))
640 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
641
642 if (!ctx->Color.ColorMask[unit][0])
643 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
644 if (!ctx->Color.ColorMask[unit][1])
645 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
646 if (!ctx->Color.ColorMask[unit][2])
647 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
648
649 /* As mentioned above, disable writes to the alpha component when the
650 * renderbuffer is XRGB.
651 */
652 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
653 !ctx->Color.ColorMask[unit][3]) {
654 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
655 }
656 }
657
658 drm_intel_bo_emit_reloc(brw->batch.bo,
659 brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
660 region->bo,
661 surf[1] - region->bo->offset,
662 I915_GEM_DOMAIN_RENDER,
663 I915_GEM_DOMAIN_RENDER);
664 }
665
666 /**
667 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
668 */
669 static void
670 brw_update_renderbuffer_surfaces(struct brw_context *brw)
671 {
672 struct gl_context *ctx = &brw->ctx;
673 GLuint i;
674
675 /* _NEW_BUFFERS | _NEW_COLOR */
676 /* Update surfaces for drawing buffers */
677 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
678 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
679 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
680 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
681 ctx->DrawBuffer->Layered, i);
682 } else {
683 brw->vtbl.update_null_renderbuffer_surface(brw, i);
684 }
685 }
686 } else {
687 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
688 }
689 brw->state.dirty.brw |= BRW_NEW_SURFACES;
690 }
691
692 const struct brw_tracked_state brw_renderbuffer_surfaces = {
693 .dirty = {
694 .mesa = (_NEW_COLOR |
695 _NEW_BUFFERS),
696 .brw = BRW_NEW_BATCH,
697 .cache = 0
698 },
699 .emit = brw_update_renderbuffer_surfaces,
700 };
701
702 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
703 .dirty = {
704 .mesa = _NEW_BUFFERS,
705 .brw = BRW_NEW_BATCH,
706 .cache = 0
707 },
708 .emit = brw_update_renderbuffer_surfaces,
709 };
710
711
712 static void
713 update_stage_texture_surfaces(struct brw_context *brw,
714 const struct gl_program *prog,
715 uint32_t *surf_offset)
716 {
717 if (!prog)
718 return;
719
720 struct gl_context *ctx = &brw->ctx;
721
722 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
723
724 for (unsigned s = 0; s < num_samplers; s++) {
725 surf_offset[s] = 0;
726
727 if (prog->SamplersUsed & (1 << s)) {
728 const unsigned unit = prog->SamplerUnits[s];
729
730 /* _NEW_TEXTURE */
731 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
732 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s);
733 }
734 }
735 }
736 }
737
738
739 /**
740 * Construct SURFACE_STATE objects for enabled textures.
741 */
742 static void
743 brw_update_texture_surfaces(struct brw_context *brw)
744 {
745 /* BRW_NEW_VERTEX_PROGRAM */
746 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
747
748 /* BRW_NEW_GEOMETRY_PROGRAM */
749 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
750
751 /* BRW_NEW_FRAGMENT_PROGRAM */
752 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
753
754 /* _NEW_TEXTURE */
755 update_stage_texture_surfaces(brw, vs,
756 brw->vs.base.surf_offset +
757 SURF_INDEX_VEC4_TEXTURE(0));
758 update_stage_texture_surfaces(brw, gs,
759 brw->gs.base.surf_offset +
760 SURF_INDEX_VEC4_TEXTURE(0));
761 update_stage_texture_surfaces(brw, fs,
762 brw->wm.base.surf_offset +
763 SURF_INDEX_TEXTURE(0));
764
765 brw->state.dirty.brw |= BRW_NEW_SURFACES;
766 }
767
768 const struct brw_tracked_state brw_texture_surfaces = {
769 .dirty = {
770 .mesa = _NEW_TEXTURE,
771 .brw = BRW_NEW_BATCH |
772 BRW_NEW_VERTEX_PROGRAM |
773 BRW_NEW_GEOMETRY_PROGRAM |
774 BRW_NEW_FRAGMENT_PROGRAM,
775 .cache = 0
776 },
777 .emit = brw_update_texture_surfaces,
778 };
779
780 void
781 brw_upload_ubo_surfaces(struct brw_context *brw,
782 struct gl_shader *shader,
783 uint32_t *surf_offsets)
784 {
785 struct gl_context *ctx = &brw->ctx;
786
787 if (!shader)
788 return;
789
790 for (int i = 0; i < shader->NumUniformBlocks; i++) {
791 struct gl_uniform_buffer_binding *binding;
792 struct intel_buffer_object *intel_bo;
793
794 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
795 intel_bo = intel_buffer_object(binding->BufferObject);
796 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
797
798 /* Because behavior for referencing outside of the binding's size in the
799 * glBindBufferRange case is undefined, we can just bind the whole buffer
800 * glBindBufferBase wants and be a correct implementation.
801 */
802 brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
803 bo->size - binding->Offset,
804 &surf_offsets[i],
805 shader->Type == GL_FRAGMENT_SHADER);
806 }
807
808 if (shader->NumUniformBlocks)
809 brw->state.dirty.brw |= BRW_NEW_SURFACES;
810 }
811
812 static void
813 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
814 {
815 struct gl_context *ctx = &brw->ctx;
816 /* _NEW_PROGRAM */
817 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
818
819 if (!prog)
820 return;
821
822 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
823 &brw->wm.base.surf_offset[SURF_INDEX_WM_UBO(0)]);
824 }
825
826 const struct brw_tracked_state brw_wm_ubo_surfaces = {
827 .dirty = {
828 .mesa = _NEW_PROGRAM,
829 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
830 .cache = 0,
831 },
832 .emit = brw_upload_wm_ubo_surfaces,
833 };
834
835 void
836 gen4_init_vtable_surface_functions(struct brw_context *brw)
837 {
838 brw->vtbl.update_texture_surface = brw_update_texture_surface;
839 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
840 brw->vtbl.update_null_renderbuffer_surface =
841 brw_update_null_renderbuffer_surface;
842 brw->vtbl.create_constant_surface = brw_create_constant_surface;
843 }