i965: Enable L3 caching of buffer surfaces.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 unreachable("not reached");
77 }
78 }
79
80 uint32_t
81 brw_get_surface_tiling_bits(uint32_t tiling)
82 {
83 switch (tiling) {
84 case I915_TILING_X:
85 return BRW_SURFACE_TILED;
86 case I915_TILING_Y:
87 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
88 default:
89 return 0;
90 }
91 }
92
93
94 uint32_t
95 brw_get_surface_num_multisamples(unsigned num_samples)
96 {
97 if (num_samples > 1)
98 return BRW_SURFACE_MULTISAMPLECOUNT_4;
99 else
100 return BRW_SURFACE_MULTISAMPLECOUNT_1;
101 }
102
103 void
104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
105 bool is_render_target,
106 unsigned *width, unsigned *height,
107 unsigned *pitch, uint32_t *tiling, unsigned *format)
108 {
109 static const unsigned halign_stencil = 8;
110
111 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
112 * there are half as many rows.
113 * In addition, mip-levels are accessed manually by the program and
114 * therefore the surface is setup to cover all the mip-levels for one slice.
115 * (Hardware is still used to access individual slices).
116 */
117 *tiling = I915_TILING_Y;
118 *pitch = mt->pitch * 2;
119 *width = ALIGN(mt->total_width, halign_stencil) * 2;
120 *height = (mt->total_height / mt->physical_depth0) / 2;
121
122 if (is_render_target) {
123 *format = BRW_SURFACEFORMAT_R8_UINT;
124 }
125 }
126
127
128 /**
129 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
130 * swizzling.
131 */
132 int
133 brw_get_texture_swizzle(const struct gl_context *ctx,
134 const struct gl_texture_object *t)
135 {
136 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
137
138 int swizzles[SWIZZLE_NIL + 1] = {
139 SWIZZLE_X,
140 SWIZZLE_Y,
141 SWIZZLE_Z,
142 SWIZZLE_W,
143 SWIZZLE_ZERO,
144 SWIZZLE_ONE,
145 SWIZZLE_NIL
146 };
147
148 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
149 img->_BaseFormat == GL_DEPTH_STENCIL) {
150 GLenum depth_mode = t->DepthMode;
151
152 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
153 * with depth component data specified with a sized internal format.
154 * Otherwise, it's left at the old default, GL_LUMINANCE.
155 */
156 if (_mesa_is_gles3(ctx) &&
157 img->InternalFormat != GL_DEPTH_COMPONENT &&
158 img->InternalFormat != GL_DEPTH_STENCIL) {
159 depth_mode = GL_RED;
160 }
161
162 switch (depth_mode) {
163 case GL_ALPHA:
164 swizzles[0] = SWIZZLE_ZERO;
165 swizzles[1] = SWIZZLE_ZERO;
166 swizzles[2] = SWIZZLE_ZERO;
167 swizzles[3] = SWIZZLE_X;
168 break;
169 case GL_LUMINANCE:
170 swizzles[0] = SWIZZLE_X;
171 swizzles[1] = SWIZZLE_X;
172 swizzles[2] = SWIZZLE_X;
173 swizzles[3] = SWIZZLE_ONE;
174 break;
175 case GL_INTENSITY:
176 swizzles[0] = SWIZZLE_X;
177 swizzles[1] = SWIZZLE_X;
178 swizzles[2] = SWIZZLE_X;
179 swizzles[3] = SWIZZLE_X;
180 break;
181 case GL_RED:
182 swizzles[0] = SWIZZLE_X;
183 swizzles[1] = SWIZZLE_ZERO;
184 swizzles[2] = SWIZZLE_ZERO;
185 swizzles[3] = SWIZZLE_ONE;
186 break;
187 }
188 }
189
190 /* If the texture's format is alpha-only, force R, G, and B to
191 * 0.0. Similarly, if the texture's format has no alpha channel,
192 * force the alpha value read to 1.0. This allows for the
193 * implementation to use an RGBA texture for any of these formats
194 * without leaking any unexpected values.
195 */
196 switch (img->_BaseFormat) {
197 case GL_ALPHA:
198 swizzles[0] = SWIZZLE_ZERO;
199 swizzles[1] = SWIZZLE_ZERO;
200 swizzles[2] = SWIZZLE_ZERO;
201 break;
202 case GL_RED:
203 case GL_RG:
204 case GL_RGB:
205 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
206 swizzles[3] = SWIZZLE_ONE;
207 break;
208 }
209
210 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
211 swizzles[GET_SWZ(t->_Swizzle, 1)],
212 swizzles[GET_SWZ(t->_Swizzle, 2)],
213 swizzles[GET_SWZ(t->_Swizzle, 3)]);
214 }
215
216 static void
217 gen4_emit_buffer_surface_state(struct brw_context *brw,
218 uint32_t *out_offset,
219 drm_intel_bo *bo,
220 unsigned buffer_offset,
221 unsigned surface_format,
222 unsigned buffer_size,
223 unsigned pitch,
224 bool rw)
225 {
226 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
227 6 * 4, 32, out_offset);
228 memset(surf, 0, 6 * 4);
229
230 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
231 surface_format << BRW_SURFACE_FORMAT_SHIFT |
232 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
233 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
234 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
235 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
236 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
237 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
238
239 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
240 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
241 * physical cache. It is mapped in hardware to the sampler cache."
242 */
243 if (bo) {
244 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
245 bo, buffer_offset,
246 I915_GEM_DOMAIN_SAMPLER,
247 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
248 }
249 }
250
251 void
252 brw_update_buffer_texture_surface(struct gl_context *ctx,
253 unsigned unit,
254 uint32_t *surf_offset)
255 {
256 struct brw_context *brw = brw_context(ctx);
257 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
258 struct intel_buffer_object *intel_obj =
259 intel_buffer_object(tObj->BufferObject);
260 uint32_t size = tObj->BufferSize;
261 drm_intel_bo *bo = NULL;
262 mesa_format format = tObj->_BufferObjectFormat;
263 uint32_t brw_format = brw_format_for_mesa_format(format);
264 int texel_size = _mesa_get_format_bytes(format);
265
266 if (intel_obj) {
267 size = MIN2(size, intel_obj->Base.Size);
268 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
269 }
270
271 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
272 _mesa_problem(NULL, "bad format %s for texture buffer\n",
273 _mesa_get_format_name(format));
274 }
275
276 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
277 tObj->BufferOffset,
278 brw_format,
279 size / texel_size,
280 texel_size,
281 false /* rw */);
282 }
283
284 static void
285 brw_update_texture_surface(struct gl_context *ctx,
286 unsigned unit,
287 uint32_t *surf_offset,
288 bool for_gather)
289 {
290 struct brw_context *brw = brw_context(ctx);
291 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
292 struct intel_texture_object *intelObj = intel_texture_object(tObj);
293 struct intel_mipmap_tree *mt = intelObj->mt;
294 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
295 uint32_t *surf;
296
297 /* BRW_NEW_TEXTURE_BUFFER */
298 if (tObj->Target == GL_TEXTURE_BUFFER) {
299 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
300 return;
301 }
302
303 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
304 6 * 4, 32, surf_offset);
305
306 uint32_t tex_format = translate_tex_format(brw, mt->format,
307 sampler->sRGBDecode);
308
309 if (for_gather) {
310 /* Sandybridge's gather4 message is broken for integer formats.
311 * To work around this, we pretend the surface is UNORM for
312 * 8 or 16-bit formats, and emit shader instructions to recover
313 * the real INT/UINT value. For 32-bit formats, we pretend
314 * the surface is FLOAT, and simply reinterpret the resulting
315 * bits.
316 */
317 switch (tex_format) {
318 case BRW_SURFACEFORMAT_R8_SINT:
319 case BRW_SURFACEFORMAT_R8_UINT:
320 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
321 break;
322
323 case BRW_SURFACEFORMAT_R16_SINT:
324 case BRW_SURFACEFORMAT_R16_UINT:
325 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
326 break;
327
328 case BRW_SURFACEFORMAT_R32_SINT:
329 case BRW_SURFACEFORMAT_R32_UINT:
330 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
331 break;
332
333 default:
334 break;
335 }
336 }
337
338 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
339 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
340 BRW_SURFACE_CUBEFACE_ENABLES |
341 tex_format << BRW_SURFACE_FORMAT_SHIFT);
342
343 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
344
345 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
346 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
347 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
348
349 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
350 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
351 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
352
353 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
354 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
355
356 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
357
358 /* Emit relocation to surface contents */
359 drm_intel_bo_emit_reloc(brw->batch.bo,
360 *surf_offset + 4,
361 mt->bo,
362 surf[1] - mt->bo->offset64,
363 I915_GEM_DOMAIN_SAMPLER, 0);
364 }
365
366 /**
367 * Create the constant buffer surface. Vertex/fragment shader constants will be
368 * read from this buffer with Data Port Read instructions/messages.
369 */
370 void
371 brw_create_constant_surface(struct brw_context *brw,
372 drm_intel_bo *bo,
373 uint32_t offset,
374 uint32_t size,
375 uint32_t *out_offset,
376 bool dword_pitch)
377 {
378 uint32_t stride = dword_pitch ? 4 : 16;
379 uint32_t elements = ALIGN(size, stride) / stride;
380
381 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
382 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
383 elements, stride, false);
384 }
385
386 /**
387 * Set up a binding table entry for use by stream output logic (transform
388 * feedback).
389 *
390 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
391 */
392 void
393 brw_update_sol_surface(struct brw_context *brw,
394 struct gl_buffer_object *buffer_obj,
395 uint32_t *out_offset, unsigned num_vector_components,
396 unsigned stride_dwords, unsigned offset_dwords)
397 {
398 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
399 uint32_t offset_bytes = 4 * offset_dwords;
400 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
401 offset_bytes,
402 buffer_obj->Size - offset_bytes);
403 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
404 out_offset);
405 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
406 size_t size_dwords = buffer_obj->Size / 4;
407 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
408
409 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
410 * too big to map using a single binding table entry?
411 */
412 assert((size_dwords - offset_dwords) / stride_dwords
413 <= BRW_MAX_NUM_BUFFER_ENTRIES);
414
415 if (size_dwords > offset_dwords + num_vector_components) {
416 /* There is room for at least 1 transform feedback output in the buffer.
417 * Compute the number of additional transform feedback outputs the
418 * buffer has room for.
419 */
420 buffer_size_minus_1 =
421 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
422 } else {
423 /* There isn't even room for a single transform feedback output in the
424 * buffer. We can't configure the binding table entry to prevent output
425 * entirely; we'll have to rely on the geometry shader to detect
426 * overflow. But to minimize the damage in case of a bug, set up the
427 * binding table entry to just allow a single output.
428 */
429 buffer_size_minus_1 = 0;
430 }
431 width = buffer_size_minus_1 & 0x7f;
432 height = (buffer_size_minus_1 & 0xfff80) >> 7;
433 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
434
435 switch (num_vector_components) {
436 case 1:
437 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
438 break;
439 case 2:
440 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
441 break;
442 case 3:
443 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
444 break;
445 case 4:
446 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
447 break;
448 default:
449 unreachable("Invalid vector size for transform feedback output");
450 }
451
452 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
453 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
454 surface_format << BRW_SURFACE_FORMAT_SHIFT |
455 BRW_SURFACE_RC_READ_WRITE;
456 surf[1] = bo->offset64 + offset_bytes; /* reloc */
457 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
458 height << BRW_SURFACE_HEIGHT_SHIFT);
459 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
460 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
461 surf[4] = 0;
462 surf[5] = 0;
463
464 /* Emit relocation to surface contents. */
465 drm_intel_bo_emit_reloc(brw->batch.bo,
466 *out_offset + 4,
467 bo, offset_bytes,
468 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
469 }
470
471 /* Creates a new WM constant buffer reflecting the current fragment program's
472 * constants, if needed by the fragment program.
473 *
474 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
475 * state atom.
476 */
477 static void
478 brw_upload_wm_pull_constants(struct brw_context *brw)
479 {
480 struct brw_stage_state *stage_state = &brw->wm.base;
481 /* BRW_NEW_FRAGMENT_PROGRAM */
482 struct brw_fragment_program *fp =
483 (struct brw_fragment_program *) brw->fragment_program;
484 /* BRW_NEW_FS_PROG_DATA */
485 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
486
487 /* _NEW_PROGRAM_CONSTANTS */
488 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
489 stage_state, prog_data, true);
490 }
491
492 const struct brw_tracked_state brw_wm_pull_constants = {
493 .dirty = {
494 .mesa = _NEW_PROGRAM_CONSTANTS,
495 .brw = BRW_NEW_BATCH |
496 BRW_NEW_FRAGMENT_PROGRAM |
497 BRW_NEW_FS_PROG_DATA,
498 },
499 .emit = brw_upload_wm_pull_constants,
500 };
501
502 /**
503 * Creates a null renderbuffer surface.
504 *
505 * This is used when the shader doesn't write to any color output. An FB
506 * write to target 0 will still be emitted, because that's how the thread is
507 * terminated (and computed depth is returned), so we need to have the
508 * hardware discard the target 0 color output..
509 */
510 static void
511 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
512 {
513 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
514 * Notes):
515 *
516 * A null surface will be used in instances where an actual surface is
517 * not bound. When a write message is generated to a null surface, no
518 * actual surface is written to. When a read message (including any
519 * sampling engine message) is generated to a null surface, the result
520 * is all zeros. Note that a null surface type is allowed to be used
521 * with all messages, even if it is not specificially indicated as
522 * supported. All of the remaining fields in surface state are ignored
523 * for null surfaces, with the following exceptions:
524 *
525 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
526 * depth buffer’s corresponding state for all render target surfaces,
527 * including null.
528 *
529 * - Surface Format must be R8G8B8A8_UNORM.
530 */
531 struct gl_context *ctx = &brw->ctx;
532 uint32_t *surf;
533 unsigned surface_type = BRW_SURFACE_NULL;
534 drm_intel_bo *bo = NULL;
535 unsigned pitch_minus_1 = 0;
536 uint32_t multisampling_state = 0;
537 /* BRW_NEW_FS_PROG_DATA */
538 uint32_t surf_index =
539 brw->wm.prog_data->binding_table.render_target_start + unit;
540
541 /* _NEW_BUFFERS */
542 const struct gl_framebuffer *fb = ctx->DrawBuffer;
543
544 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
545 &brw->wm.base.surf_offset[surf_index]);
546
547 if (fb->Visual.samples > 1) {
548 /* On Gen6, null render targets seem to cause GPU hangs when
549 * multisampling. So work around this problem by rendering into dummy
550 * color buffer.
551 *
552 * To decrease the amount of memory needed by the workaround buffer, we
553 * set its pitch to 128 bytes (the width of a Y tile). This means that
554 * the amount of memory needed for the workaround buffer is
555 * (width_in_tiles + height_in_tiles - 1) tiles.
556 *
557 * Note that since the workaround buffer will be interpreted by the
558 * hardware as an interleaved multisampled buffer, we need to compute
559 * width_in_tiles and height_in_tiles by dividing the width and height
560 * by 16 rather than the normal Y-tile size of 32.
561 */
562 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
563 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
564 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
565 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
566 size_needed);
567 bo = brw->wm.multisampled_null_render_target_bo;
568 surface_type = BRW_SURFACE_2D;
569 pitch_minus_1 = 127;
570 multisampling_state =
571 brw_get_surface_num_multisamples(fb->Visual.samples);
572 }
573
574 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
575 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
576 if (brw->gen < 6) {
577 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
578 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
579 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
580 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
581 }
582 surf[1] = bo ? bo->offset64 : 0;
583 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
584 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
585
586 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
587 * Notes):
588 *
589 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
590 */
591 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
592 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
593 surf[4] = multisampling_state;
594 surf[5] = 0;
595
596 if (bo) {
597 drm_intel_bo_emit_reloc(brw->batch.bo,
598 brw->wm.base.surf_offset[surf_index] + 4,
599 bo, 0,
600 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
601 }
602 }
603
604 /**
605 * Sets up a surface state structure to point at the given region.
606 * While it is only used for the front/back buffer currently, it should be
607 * usable for further buffers when doing ARB_draw_buffer support.
608 */
609 static void
610 brw_update_renderbuffer_surface(struct brw_context *brw,
611 struct gl_renderbuffer *rb,
612 bool layered,
613 unsigned int unit)
614 {
615 struct gl_context *ctx = &brw->ctx;
616 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
617 struct intel_mipmap_tree *mt = irb->mt;
618 uint32_t *surf;
619 uint32_t tile_x, tile_y;
620 uint32_t format = 0;
621 /* _NEW_BUFFERS */
622 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
623 /* BRW_NEW_FS_PROG_DATA */
624 uint32_t surf_index =
625 brw->wm.prog_data->binding_table.render_target_start + unit;
626
627 assert(!layered);
628
629 if (rb->TexImage && !brw->has_surface_tile_offset) {
630 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
631
632 if (tile_x != 0 || tile_y != 0) {
633 /* Original gen4 hardware couldn't draw to a non-tile-aligned
634 * destination in a miptree unless you actually setup your renderbuffer
635 * as a miptree and used the fragile lod/array_index/etc. controls to
636 * select the image. So, instead, we just make a new single-level
637 * miptree and render into that.
638 */
639 intel_renderbuffer_move_to_temp(brw, irb, false);
640 mt = irb->mt;
641 }
642 }
643
644 intel_miptree_used_for_rendering(irb->mt);
645
646 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
647 &brw->wm.base.surf_offset[surf_index]);
648
649 format = brw->render_target_format[rb_format];
650 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
651 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
652 __FUNCTION__, _mesa_get_format_name(rb_format));
653 }
654
655 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
656 format << BRW_SURFACE_FORMAT_SHIFT);
657
658 /* reloc */
659 assert(mt->offset % mt->cpp == 0);
660 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
661 mt->bo->offset64 + mt->offset);
662
663 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
664 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
665
666 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
667 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
668
669 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
670
671 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
672 /* Note that the low bits of these fields are missing, so
673 * there's the possibility of getting in trouble.
674 */
675 assert(tile_x % 4 == 0);
676 assert(tile_y % 2 == 0);
677 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
678 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
679 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
680
681 if (brw->gen < 6) {
682 /* _NEW_COLOR */
683 if (!ctx->Color.ColorLogicOpEnabled &&
684 (ctx->Color.BlendEnabled & (1 << unit)))
685 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
686
687 if (!ctx->Color.ColorMask[unit][0])
688 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
689 if (!ctx->Color.ColorMask[unit][1])
690 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
691 if (!ctx->Color.ColorMask[unit][2])
692 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
693
694 /* As mentioned above, disable writes to the alpha component when the
695 * renderbuffer is XRGB.
696 */
697 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
698 !ctx->Color.ColorMask[unit][3]) {
699 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
700 }
701 }
702
703 drm_intel_bo_emit_reloc(brw->batch.bo,
704 brw->wm.base.surf_offset[surf_index] + 4,
705 mt->bo,
706 surf[1] - mt->bo->offset64,
707 I915_GEM_DOMAIN_RENDER,
708 I915_GEM_DOMAIN_RENDER);
709 }
710
711 /**
712 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
713 */
714 static void
715 brw_update_renderbuffer_surfaces(struct brw_context *brw)
716 {
717 struct gl_context *ctx = &brw->ctx;
718 GLuint i;
719
720 /* _NEW_BUFFERS | _NEW_COLOR */
721 /* Update surfaces for drawing buffers */
722 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
723 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
724 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
725 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
726 ctx->DrawBuffer->MaxNumLayers > 0, i);
727 } else {
728 brw->vtbl.update_null_renderbuffer_surface(brw, i);
729 }
730 }
731 } else {
732 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
733 }
734 brw->state.dirty.brw |= BRW_NEW_SURFACES;
735 }
736
737 const struct brw_tracked_state brw_renderbuffer_surfaces = {
738 .dirty = {
739 .mesa = _NEW_BUFFERS |
740 _NEW_COLOR,
741 .brw = BRW_NEW_BATCH |
742 BRW_NEW_FS_PROG_DATA,
743 },
744 .emit = brw_update_renderbuffer_surfaces,
745 };
746
747 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
748 .dirty = {
749 .mesa = _NEW_BUFFERS,
750 .brw = BRW_NEW_BATCH,
751 },
752 .emit = brw_update_renderbuffer_surfaces,
753 };
754
755
756 static void
757 update_stage_texture_surfaces(struct brw_context *brw,
758 const struct gl_program *prog,
759 struct brw_stage_state *stage_state,
760 bool for_gather)
761 {
762 if (!prog)
763 return;
764
765 struct gl_context *ctx = &brw->ctx;
766
767 uint32_t *surf_offset = stage_state->surf_offset;
768
769 /* BRW_NEW_*_PROG_DATA */
770 if (for_gather)
771 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
772 else
773 surf_offset += stage_state->prog_data->binding_table.texture_start;
774
775 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
776 for (unsigned s = 0; s < num_samplers; s++) {
777 surf_offset[s] = 0;
778
779 if (prog->SamplersUsed & (1 << s)) {
780 const unsigned unit = prog->SamplerUnits[s];
781
782 /* _NEW_TEXTURE */
783 if (ctx->Texture.Unit[unit]._Current) {
784 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
785 }
786 }
787 }
788 }
789
790
791 /**
792 * Construct SURFACE_STATE objects for enabled textures.
793 */
794 static void
795 brw_update_texture_surfaces(struct brw_context *brw)
796 {
797 /* BRW_NEW_VERTEX_PROGRAM */
798 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
799
800 /* BRW_NEW_GEOMETRY_PROGRAM */
801 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
802
803 /* BRW_NEW_FRAGMENT_PROGRAM */
804 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
805
806 /* _NEW_TEXTURE */
807 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
808 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
809 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
810
811 /* emit alternate set of surface state for gather. this
812 * allows the surface format to be overriden for only the
813 * gather4 messages. */
814 if (brw->gen < 8) {
815 if (vs && vs->UsesGather)
816 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
817 if (gs && gs->UsesGather)
818 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
819 if (fs && fs->UsesGather)
820 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
821 }
822
823 brw->state.dirty.brw |= BRW_NEW_SURFACES;
824 }
825
826 const struct brw_tracked_state brw_texture_surfaces = {
827 .dirty = {
828 .mesa = _NEW_TEXTURE,
829 .brw = BRW_NEW_BATCH |
830 BRW_NEW_FRAGMENT_PROGRAM |
831 BRW_NEW_FS_PROG_DATA |
832 BRW_NEW_GEOMETRY_PROGRAM |
833 BRW_NEW_GS_PROG_DATA |
834 BRW_NEW_TEXTURE_BUFFER |
835 BRW_NEW_VERTEX_PROGRAM |
836 BRW_NEW_VS_PROG_DATA,
837 },
838 .emit = brw_update_texture_surfaces,
839 };
840
841 void
842 brw_upload_ubo_surfaces(struct brw_context *brw,
843 struct gl_shader *shader,
844 struct brw_stage_state *stage_state,
845 struct brw_stage_prog_data *prog_data,
846 bool dword_pitch)
847 {
848 struct gl_context *ctx = &brw->ctx;
849
850 if (!shader)
851 return;
852
853 uint32_t *surf_offsets =
854 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
855
856 for (int i = 0; i < shader->NumUniformBlocks; i++) {
857 struct gl_uniform_buffer_binding *binding;
858 struct intel_buffer_object *intel_bo;
859
860 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
861 intel_bo = intel_buffer_object(binding->BufferObject);
862 drm_intel_bo *bo =
863 intel_bufferobj_buffer(brw, intel_bo,
864 binding->Offset,
865 binding->BufferObject->Size - binding->Offset);
866
867 /* Because behavior for referencing outside of the binding's size in the
868 * glBindBufferRange case is undefined, we can just bind the whole buffer
869 * glBindBufferBase wants and be a correct implementation.
870 */
871 brw_create_constant_surface(brw, bo, binding->Offset,
872 bo->size - binding->Offset,
873 &surf_offsets[i],
874 dword_pitch);
875 }
876
877 if (shader->NumUniformBlocks)
878 brw->state.dirty.brw |= BRW_NEW_SURFACES;
879 }
880
881 static void
882 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
883 {
884 struct gl_context *ctx = &brw->ctx;
885 /* _NEW_PROGRAM */
886 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
887
888 if (!prog)
889 return;
890
891 /* BRW_NEW_FS_PROG_DATA */
892 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
893 &brw->wm.base, &brw->wm.prog_data->base, true);
894 }
895
896 const struct brw_tracked_state brw_wm_ubo_surfaces = {
897 .dirty = {
898 .mesa = _NEW_PROGRAM,
899 .brw = BRW_NEW_BATCH |
900 BRW_NEW_FS_PROG_DATA |
901 BRW_NEW_UNIFORM_BUFFER,
902 },
903 .emit = brw_upload_wm_ubo_surfaces,
904 };
905
906 void
907 brw_upload_abo_surfaces(struct brw_context *brw,
908 struct gl_shader_program *prog,
909 struct brw_stage_state *stage_state,
910 struct brw_stage_prog_data *prog_data)
911 {
912 struct gl_context *ctx = &brw->ctx;
913 uint32_t *surf_offsets =
914 &stage_state->surf_offset[prog_data->binding_table.abo_start];
915
916 for (int i = 0; i < prog->NumAtomicBuffers; i++) {
917 struct gl_atomic_buffer_binding *binding =
918 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
919 struct intel_buffer_object *intel_bo =
920 intel_buffer_object(binding->BufferObject);
921 drm_intel_bo *bo = intel_bufferobj_buffer(
922 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
923
924 brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
925 bo->size - binding->Offset,
926 &surf_offsets[i], true);
927 }
928
929 if (prog->NumAtomicBuffers)
930 brw->state.dirty.brw |= BRW_NEW_SURFACES;
931 }
932
933 static void
934 brw_upload_wm_abo_surfaces(struct brw_context *brw)
935 {
936 struct gl_context *ctx = &brw->ctx;
937 /* _NEW_PROGRAM */
938 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
939
940 if (prog) {
941 /* BRW_NEW_FS_PROG_DATA */
942 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
943 &brw->wm.prog_data->base);
944 }
945 }
946
947 const struct brw_tracked_state brw_wm_abo_surfaces = {
948 .dirty = {
949 .mesa = _NEW_PROGRAM,
950 .brw = BRW_NEW_ATOMIC_BUFFER |
951 BRW_NEW_BATCH |
952 BRW_NEW_FS_PROG_DATA,
953 },
954 .emit = brw_upload_wm_abo_surfaces,
955 };
956
957 void
958 gen4_init_vtable_surface_functions(struct brw_context *brw)
959 {
960 brw->vtbl.update_texture_surface = brw_update_texture_surface;
961 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
962 brw->vtbl.update_null_renderbuffer_surface =
963 brw_update_null_renderbuffer_surface;
964 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
965 }