i965: Override swizzles for integer luminance formats.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 unreachable("not reached");
77 }
78 }
79
80 uint32_t
81 brw_get_surface_tiling_bits(uint32_t tiling)
82 {
83 switch (tiling) {
84 case I915_TILING_X:
85 return BRW_SURFACE_TILED;
86 case I915_TILING_Y:
87 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
88 default:
89 return 0;
90 }
91 }
92
93
94 uint32_t
95 brw_get_surface_num_multisamples(unsigned num_samples)
96 {
97 if (num_samples > 1)
98 return BRW_SURFACE_MULTISAMPLECOUNT_4;
99 else
100 return BRW_SURFACE_MULTISAMPLECOUNT_1;
101 }
102
103 void
104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
105 bool is_render_target,
106 unsigned *width, unsigned *height,
107 unsigned *pitch, uint32_t *tiling, unsigned *format)
108 {
109 static const unsigned halign_stencil = 8;
110
111 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
112 * there are half as many rows.
113 * In addition, mip-levels are accessed manually by the program and
114 * therefore the surface is setup to cover all the mip-levels for one slice.
115 * (Hardware is still used to access individual slices).
116 */
117 *tiling = I915_TILING_Y;
118 *pitch = mt->pitch * 2;
119 *width = ALIGN(mt->total_width, halign_stencil) * 2;
120 *height = (mt->total_height / mt->physical_depth0) / 2;
121
122 if (is_render_target) {
123 *format = BRW_SURFACEFORMAT_R8_UINT;
124 }
125 }
126
127
128 /**
129 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
130 * swizzling.
131 */
132 int
133 brw_get_texture_swizzle(const struct gl_context *ctx,
134 const struct gl_texture_object *t)
135 {
136 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
137
138 int swizzles[SWIZZLE_NIL + 1] = {
139 SWIZZLE_X,
140 SWIZZLE_Y,
141 SWIZZLE_Z,
142 SWIZZLE_W,
143 SWIZZLE_ZERO,
144 SWIZZLE_ONE,
145 SWIZZLE_NIL
146 };
147
148 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
149 img->_BaseFormat == GL_DEPTH_STENCIL) {
150 GLenum depth_mode = t->DepthMode;
151
152 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
153 * with depth component data specified with a sized internal format.
154 * Otherwise, it's left at the old default, GL_LUMINANCE.
155 */
156 if (_mesa_is_gles3(ctx) &&
157 img->InternalFormat != GL_DEPTH_COMPONENT &&
158 img->InternalFormat != GL_DEPTH_STENCIL) {
159 depth_mode = GL_RED;
160 }
161
162 switch (depth_mode) {
163 case GL_ALPHA:
164 swizzles[0] = SWIZZLE_ZERO;
165 swizzles[1] = SWIZZLE_ZERO;
166 swizzles[2] = SWIZZLE_ZERO;
167 swizzles[3] = SWIZZLE_X;
168 break;
169 case GL_LUMINANCE:
170 swizzles[0] = SWIZZLE_X;
171 swizzles[1] = SWIZZLE_X;
172 swizzles[2] = SWIZZLE_X;
173 swizzles[3] = SWIZZLE_ONE;
174 break;
175 case GL_INTENSITY:
176 swizzles[0] = SWIZZLE_X;
177 swizzles[1] = SWIZZLE_X;
178 swizzles[2] = SWIZZLE_X;
179 swizzles[3] = SWIZZLE_X;
180 break;
181 case GL_RED:
182 swizzles[0] = SWIZZLE_X;
183 swizzles[1] = SWIZZLE_ZERO;
184 swizzles[2] = SWIZZLE_ZERO;
185 swizzles[3] = SWIZZLE_ONE;
186 break;
187 }
188 }
189
190 /* If the texture's format is alpha-only, force R, G, and B to
191 * 0.0. Similarly, if the texture's format has no alpha channel,
192 * force the alpha value read to 1.0. This allows for the
193 * implementation to use an RGBA texture for any of these formats
194 * without leaking any unexpected values.
195 */
196 switch (img->_BaseFormat) {
197 case GL_ALPHA:
198 swizzles[0] = SWIZZLE_ZERO;
199 swizzles[1] = SWIZZLE_ZERO;
200 swizzles[2] = SWIZZLE_ZERO;
201 break;
202 case GL_LUMINANCE:
203 if (t->_IsIntegerFormat) {
204 swizzles[0] = SWIZZLE_X;
205 swizzles[1] = SWIZZLE_X;
206 swizzles[2] = SWIZZLE_X;
207 swizzles[3] = SWIZZLE_ONE;
208 }
209 break;
210 case GL_RED:
211 case GL_RG:
212 case GL_RGB:
213 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
214 swizzles[3] = SWIZZLE_ONE;
215 break;
216 }
217
218 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
219 swizzles[GET_SWZ(t->_Swizzle, 1)],
220 swizzles[GET_SWZ(t->_Swizzle, 2)],
221 swizzles[GET_SWZ(t->_Swizzle, 3)]);
222 }
223
224 static void
225 gen4_emit_buffer_surface_state(struct brw_context *brw,
226 uint32_t *out_offset,
227 drm_intel_bo *bo,
228 unsigned buffer_offset,
229 unsigned surface_format,
230 unsigned buffer_size,
231 unsigned pitch,
232 bool rw)
233 {
234 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
235 6 * 4, 32, out_offset);
236 memset(surf, 0, 6 * 4);
237
238 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
239 surface_format << BRW_SURFACE_FORMAT_SHIFT |
240 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
241 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
242 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
243 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
244 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
245 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
246
247 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
248 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
249 * physical cache. It is mapped in hardware to the sampler cache."
250 */
251 if (bo) {
252 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
253 bo, buffer_offset,
254 I915_GEM_DOMAIN_SAMPLER,
255 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
256 }
257 }
258
259 void
260 brw_update_buffer_texture_surface(struct gl_context *ctx,
261 unsigned unit,
262 uint32_t *surf_offset)
263 {
264 struct brw_context *brw = brw_context(ctx);
265 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
266 struct intel_buffer_object *intel_obj =
267 intel_buffer_object(tObj->BufferObject);
268 uint32_t size = tObj->BufferSize;
269 drm_intel_bo *bo = NULL;
270 mesa_format format = tObj->_BufferObjectFormat;
271 uint32_t brw_format = brw_format_for_mesa_format(format);
272 int texel_size = _mesa_get_format_bytes(format);
273
274 if (intel_obj) {
275 size = MIN2(size, intel_obj->Base.Size);
276 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
277 }
278
279 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
280 _mesa_problem(NULL, "bad format %s for texture buffer\n",
281 _mesa_get_format_name(format));
282 }
283
284 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
285 tObj->BufferOffset,
286 brw_format,
287 size / texel_size,
288 texel_size,
289 false /* rw */);
290 }
291
292 static void
293 brw_update_texture_surface(struct gl_context *ctx,
294 unsigned unit,
295 uint32_t *surf_offset,
296 bool for_gather)
297 {
298 struct brw_context *brw = brw_context(ctx);
299 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
300 struct intel_texture_object *intelObj = intel_texture_object(tObj);
301 struct intel_mipmap_tree *mt = intelObj->mt;
302 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
303 uint32_t *surf;
304
305 /* BRW_NEW_TEXTURE_BUFFER */
306 if (tObj->Target == GL_TEXTURE_BUFFER) {
307 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
308 return;
309 }
310
311 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
312 6 * 4, 32, surf_offset);
313
314 uint32_t tex_format = translate_tex_format(brw, mt->format,
315 sampler->sRGBDecode);
316
317 if (for_gather) {
318 /* Sandybridge's gather4 message is broken for integer formats.
319 * To work around this, we pretend the surface is UNORM for
320 * 8 or 16-bit formats, and emit shader instructions to recover
321 * the real INT/UINT value. For 32-bit formats, we pretend
322 * the surface is FLOAT, and simply reinterpret the resulting
323 * bits.
324 */
325 switch (tex_format) {
326 case BRW_SURFACEFORMAT_R8_SINT:
327 case BRW_SURFACEFORMAT_R8_UINT:
328 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
329 break;
330
331 case BRW_SURFACEFORMAT_R16_SINT:
332 case BRW_SURFACEFORMAT_R16_UINT:
333 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
334 break;
335
336 case BRW_SURFACEFORMAT_R32_SINT:
337 case BRW_SURFACEFORMAT_R32_UINT:
338 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
339 break;
340
341 default:
342 break;
343 }
344 }
345
346 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
347 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
348 BRW_SURFACE_CUBEFACE_ENABLES |
349 tex_format << BRW_SURFACE_FORMAT_SHIFT);
350
351 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
352
353 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
354 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
355 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
356
357 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
358 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
359 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
360
361 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
362 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
363
364 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
365
366 /* Emit relocation to surface contents */
367 drm_intel_bo_emit_reloc(brw->batch.bo,
368 *surf_offset + 4,
369 mt->bo,
370 surf[1] - mt->bo->offset64,
371 I915_GEM_DOMAIN_SAMPLER, 0);
372 }
373
374 /**
375 * Create the constant buffer surface. Vertex/fragment shader constants will be
376 * read from this buffer with Data Port Read instructions/messages.
377 */
378 void
379 brw_create_constant_surface(struct brw_context *brw,
380 drm_intel_bo *bo,
381 uint32_t offset,
382 uint32_t size,
383 uint32_t *out_offset,
384 bool dword_pitch)
385 {
386 uint32_t stride = dword_pitch ? 4 : 16;
387 uint32_t elements = ALIGN(size, stride) / stride;
388
389 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
390 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
391 elements, stride, false);
392 }
393
394 /**
395 * Set up a binding table entry for use by stream output logic (transform
396 * feedback).
397 *
398 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
399 */
400 void
401 brw_update_sol_surface(struct brw_context *brw,
402 struct gl_buffer_object *buffer_obj,
403 uint32_t *out_offset, unsigned num_vector_components,
404 unsigned stride_dwords, unsigned offset_dwords)
405 {
406 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
407 uint32_t offset_bytes = 4 * offset_dwords;
408 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
409 offset_bytes,
410 buffer_obj->Size - offset_bytes);
411 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
412 out_offset);
413 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
414 size_t size_dwords = buffer_obj->Size / 4;
415 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
416
417 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
418 * too big to map using a single binding table entry?
419 */
420 assert((size_dwords - offset_dwords) / stride_dwords
421 <= BRW_MAX_NUM_BUFFER_ENTRIES);
422
423 if (size_dwords > offset_dwords + num_vector_components) {
424 /* There is room for at least 1 transform feedback output in the buffer.
425 * Compute the number of additional transform feedback outputs the
426 * buffer has room for.
427 */
428 buffer_size_minus_1 =
429 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
430 } else {
431 /* There isn't even room for a single transform feedback output in the
432 * buffer. We can't configure the binding table entry to prevent output
433 * entirely; we'll have to rely on the geometry shader to detect
434 * overflow. But to minimize the damage in case of a bug, set up the
435 * binding table entry to just allow a single output.
436 */
437 buffer_size_minus_1 = 0;
438 }
439 width = buffer_size_minus_1 & 0x7f;
440 height = (buffer_size_minus_1 & 0xfff80) >> 7;
441 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
442
443 switch (num_vector_components) {
444 case 1:
445 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
446 break;
447 case 2:
448 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
449 break;
450 case 3:
451 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
452 break;
453 case 4:
454 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
455 break;
456 default:
457 unreachable("Invalid vector size for transform feedback output");
458 }
459
460 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
461 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
462 surface_format << BRW_SURFACE_FORMAT_SHIFT |
463 BRW_SURFACE_RC_READ_WRITE;
464 surf[1] = bo->offset64 + offset_bytes; /* reloc */
465 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
466 height << BRW_SURFACE_HEIGHT_SHIFT);
467 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
468 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
469 surf[4] = 0;
470 surf[5] = 0;
471
472 /* Emit relocation to surface contents. */
473 drm_intel_bo_emit_reloc(brw->batch.bo,
474 *out_offset + 4,
475 bo, offset_bytes,
476 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
477 }
478
479 /* Creates a new WM constant buffer reflecting the current fragment program's
480 * constants, if needed by the fragment program.
481 *
482 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
483 * state atom.
484 */
485 static void
486 brw_upload_wm_pull_constants(struct brw_context *brw)
487 {
488 struct brw_stage_state *stage_state = &brw->wm.base;
489 /* BRW_NEW_FRAGMENT_PROGRAM */
490 struct brw_fragment_program *fp =
491 (struct brw_fragment_program *) brw->fragment_program;
492 /* BRW_NEW_FS_PROG_DATA */
493 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
494
495 /* _NEW_PROGRAM_CONSTANTS */
496 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
497 stage_state, prog_data, true);
498 }
499
500 const struct brw_tracked_state brw_wm_pull_constants = {
501 .dirty = {
502 .mesa = _NEW_PROGRAM_CONSTANTS,
503 .brw = BRW_NEW_BATCH |
504 BRW_NEW_FRAGMENT_PROGRAM |
505 BRW_NEW_FS_PROG_DATA,
506 },
507 .emit = brw_upload_wm_pull_constants,
508 };
509
510 /**
511 * Creates a null renderbuffer surface.
512 *
513 * This is used when the shader doesn't write to any color output. An FB
514 * write to target 0 will still be emitted, because that's how the thread is
515 * terminated (and computed depth is returned), so we need to have the
516 * hardware discard the target 0 color output..
517 */
518 static void
519 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
520 {
521 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
522 * Notes):
523 *
524 * A null surface will be used in instances where an actual surface is
525 * not bound. When a write message is generated to a null surface, no
526 * actual surface is written to. When a read message (including any
527 * sampling engine message) is generated to a null surface, the result
528 * is all zeros. Note that a null surface type is allowed to be used
529 * with all messages, even if it is not specificially indicated as
530 * supported. All of the remaining fields in surface state are ignored
531 * for null surfaces, with the following exceptions:
532 *
533 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
534 * depth buffer’s corresponding state for all render target surfaces,
535 * including null.
536 *
537 * - Surface Format must be R8G8B8A8_UNORM.
538 */
539 struct gl_context *ctx = &brw->ctx;
540 uint32_t *surf;
541 unsigned surface_type = BRW_SURFACE_NULL;
542 drm_intel_bo *bo = NULL;
543 unsigned pitch_minus_1 = 0;
544 uint32_t multisampling_state = 0;
545 /* BRW_NEW_FS_PROG_DATA */
546 uint32_t surf_index =
547 brw->wm.prog_data->binding_table.render_target_start + unit;
548
549 /* _NEW_BUFFERS */
550 const struct gl_framebuffer *fb = ctx->DrawBuffer;
551
552 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
553 &brw->wm.base.surf_offset[surf_index]);
554
555 if (fb->Visual.samples > 1) {
556 /* On Gen6, null render targets seem to cause GPU hangs when
557 * multisampling. So work around this problem by rendering into dummy
558 * color buffer.
559 *
560 * To decrease the amount of memory needed by the workaround buffer, we
561 * set its pitch to 128 bytes (the width of a Y tile). This means that
562 * the amount of memory needed for the workaround buffer is
563 * (width_in_tiles + height_in_tiles - 1) tiles.
564 *
565 * Note that since the workaround buffer will be interpreted by the
566 * hardware as an interleaved multisampled buffer, we need to compute
567 * width_in_tiles and height_in_tiles by dividing the width and height
568 * by 16 rather than the normal Y-tile size of 32.
569 */
570 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
571 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
572 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
573 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
574 size_needed);
575 bo = brw->wm.multisampled_null_render_target_bo;
576 surface_type = BRW_SURFACE_2D;
577 pitch_minus_1 = 127;
578 multisampling_state =
579 brw_get_surface_num_multisamples(fb->Visual.samples);
580 }
581
582 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
583 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
584 if (brw->gen < 6) {
585 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
586 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
587 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
588 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
589 }
590 surf[1] = bo ? bo->offset64 : 0;
591 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
592 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
593
594 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
595 * Notes):
596 *
597 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
598 */
599 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
600 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
601 surf[4] = multisampling_state;
602 surf[5] = 0;
603
604 if (bo) {
605 drm_intel_bo_emit_reloc(brw->batch.bo,
606 brw->wm.base.surf_offset[surf_index] + 4,
607 bo, 0,
608 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
609 }
610 }
611
612 /**
613 * Sets up a surface state structure to point at the given region.
614 * While it is only used for the front/back buffer currently, it should be
615 * usable for further buffers when doing ARB_draw_buffer support.
616 */
617 static void
618 brw_update_renderbuffer_surface(struct brw_context *brw,
619 struct gl_renderbuffer *rb,
620 bool layered,
621 unsigned int unit)
622 {
623 struct gl_context *ctx = &brw->ctx;
624 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
625 struct intel_mipmap_tree *mt = irb->mt;
626 uint32_t *surf;
627 uint32_t tile_x, tile_y;
628 uint32_t format = 0;
629 /* _NEW_BUFFERS */
630 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
631 /* BRW_NEW_FS_PROG_DATA */
632 uint32_t surf_index =
633 brw->wm.prog_data->binding_table.render_target_start + unit;
634
635 assert(!layered);
636
637 if (rb->TexImage && !brw->has_surface_tile_offset) {
638 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
639
640 if (tile_x != 0 || tile_y != 0) {
641 /* Original gen4 hardware couldn't draw to a non-tile-aligned
642 * destination in a miptree unless you actually setup your renderbuffer
643 * as a miptree and used the fragile lod/array_index/etc. controls to
644 * select the image. So, instead, we just make a new single-level
645 * miptree and render into that.
646 */
647 intel_renderbuffer_move_to_temp(brw, irb, false);
648 mt = irb->mt;
649 }
650 }
651
652 intel_miptree_used_for_rendering(irb->mt);
653
654 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
655 &brw->wm.base.surf_offset[surf_index]);
656
657 format = brw->render_target_format[rb_format];
658 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
659 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
660 __FUNCTION__, _mesa_get_format_name(rb_format));
661 }
662
663 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
664 format << BRW_SURFACE_FORMAT_SHIFT);
665
666 /* reloc */
667 assert(mt->offset % mt->cpp == 0);
668 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
669 mt->bo->offset64 + mt->offset);
670
671 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
672 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
673
674 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
675 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
676
677 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
678
679 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
680 /* Note that the low bits of these fields are missing, so
681 * there's the possibility of getting in trouble.
682 */
683 assert(tile_x % 4 == 0);
684 assert(tile_y % 2 == 0);
685 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
686 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
687 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
688
689 if (brw->gen < 6) {
690 /* _NEW_COLOR */
691 if (!ctx->Color.ColorLogicOpEnabled &&
692 (ctx->Color.BlendEnabled & (1 << unit)))
693 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
694
695 if (!ctx->Color.ColorMask[unit][0])
696 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
697 if (!ctx->Color.ColorMask[unit][1])
698 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
699 if (!ctx->Color.ColorMask[unit][2])
700 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
701
702 /* As mentioned above, disable writes to the alpha component when the
703 * renderbuffer is XRGB.
704 */
705 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
706 !ctx->Color.ColorMask[unit][3]) {
707 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
708 }
709 }
710
711 drm_intel_bo_emit_reloc(brw->batch.bo,
712 brw->wm.base.surf_offset[surf_index] + 4,
713 mt->bo,
714 surf[1] - mt->bo->offset64,
715 I915_GEM_DOMAIN_RENDER,
716 I915_GEM_DOMAIN_RENDER);
717 }
718
719 /**
720 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
721 */
722 static void
723 brw_update_renderbuffer_surfaces(struct brw_context *brw)
724 {
725 struct gl_context *ctx = &brw->ctx;
726 GLuint i;
727
728 /* _NEW_BUFFERS | _NEW_COLOR */
729 /* Update surfaces for drawing buffers */
730 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
731 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
732 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
733 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
734 ctx->DrawBuffer->MaxNumLayers > 0, i);
735 } else {
736 brw->vtbl.update_null_renderbuffer_surface(brw, i);
737 }
738 }
739 } else {
740 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
741 }
742 brw->state.dirty.brw |= BRW_NEW_SURFACES;
743 }
744
745 const struct brw_tracked_state brw_renderbuffer_surfaces = {
746 .dirty = {
747 .mesa = _NEW_BUFFERS |
748 _NEW_COLOR,
749 .brw = BRW_NEW_BATCH |
750 BRW_NEW_FS_PROG_DATA,
751 },
752 .emit = brw_update_renderbuffer_surfaces,
753 };
754
755 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
756 .dirty = {
757 .mesa = _NEW_BUFFERS,
758 .brw = BRW_NEW_BATCH,
759 },
760 .emit = brw_update_renderbuffer_surfaces,
761 };
762
763
764 static void
765 update_stage_texture_surfaces(struct brw_context *brw,
766 const struct gl_program *prog,
767 struct brw_stage_state *stage_state,
768 bool for_gather)
769 {
770 if (!prog)
771 return;
772
773 struct gl_context *ctx = &brw->ctx;
774
775 uint32_t *surf_offset = stage_state->surf_offset;
776
777 /* BRW_NEW_*_PROG_DATA */
778 if (for_gather)
779 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
780 else
781 surf_offset += stage_state->prog_data->binding_table.texture_start;
782
783 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
784 for (unsigned s = 0; s < num_samplers; s++) {
785 surf_offset[s] = 0;
786
787 if (prog->SamplersUsed & (1 << s)) {
788 const unsigned unit = prog->SamplerUnits[s];
789
790 /* _NEW_TEXTURE */
791 if (ctx->Texture.Unit[unit]._Current) {
792 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
793 }
794 }
795 }
796 }
797
798
799 /**
800 * Construct SURFACE_STATE objects for enabled textures.
801 */
802 static void
803 brw_update_texture_surfaces(struct brw_context *brw)
804 {
805 /* BRW_NEW_VERTEX_PROGRAM */
806 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
807
808 /* BRW_NEW_GEOMETRY_PROGRAM */
809 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
810
811 /* BRW_NEW_FRAGMENT_PROGRAM */
812 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
813
814 /* _NEW_TEXTURE */
815 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
816 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
817 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
818
819 /* emit alternate set of surface state for gather. this
820 * allows the surface format to be overriden for only the
821 * gather4 messages. */
822 if (brw->gen < 8) {
823 if (vs && vs->UsesGather)
824 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
825 if (gs && gs->UsesGather)
826 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
827 if (fs && fs->UsesGather)
828 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
829 }
830
831 brw->state.dirty.brw |= BRW_NEW_SURFACES;
832 }
833
834 const struct brw_tracked_state brw_texture_surfaces = {
835 .dirty = {
836 .mesa = _NEW_TEXTURE,
837 .brw = BRW_NEW_BATCH |
838 BRW_NEW_FRAGMENT_PROGRAM |
839 BRW_NEW_FS_PROG_DATA |
840 BRW_NEW_GEOMETRY_PROGRAM |
841 BRW_NEW_GS_PROG_DATA |
842 BRW_NEW_TEXTURE_BUFFER |
843 BRW_NEW_VERTEX_PROGRAM |
844 BRW_NEW_VS_PROG_DATA,
845 },
846 .emit = brw_update_texture_surfaces,
847 };
848
849 void
850 brw_upload_ubo_surfaces(struct brw_context *brw,
851 struct gl_shader *shader,
852 struct brw_stage_state *stage_state,
853 struct brw_stage_prog_data *prog_data,
854 bool dword_pitch)
855 {
856 struct gl_context *ctx = &brw->ctx;
857
858 if (!shader)
859 return;
860
861 uint32_t *surf_offsets =
862 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
863
864 for (int i = 0; i < shader->NumUniformBlocks; i++) {
865 struct gl_uniform_buffer_binding *binding;
866 struct intel_buffer_object *intel_bo;
867
868 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
869 intel_bo = intel_buffer_object(binding->BufferObject);
870 drm_intel_bo *bo =
871 intel_bufferobj_buffer(brw, intel_bo,
872 binding->Offset,
873 binding->BufferObject->Size - binding->Offset);
874
875 /* Because behavior for referencing outside of the binding's size in the
876 * glBindBufferRange case is undefined, we can just bind the whole buffer
877 * glBindBufferBase wants and be a correct implementation.
878 */
879 brw_create_constant_surface(brw, bo, binding->Offset,
880 bo->size - binding->Offset,
881 &surf_offsets[i],
882 dword_pitch);
883 }
884
885 if (shader->NumUniformBlocks)
886 brw->state.dirty.brw |= BRW_NEW_SURFACES;
887 }
888
889 static void
890 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
891 {
892 struct gl_context *ctx = &brw->ctx;
893 /* _NEW_PROGRAM */
894 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
895
896 if (!prog)
897 return;
898
899 /* BRW_NEW_FS_PROG_DATA */
900 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
901 &brw->wm.base, &brw->wm.prog_data->base, true);
902 }
903
904 const struct brw_tracked_state brw_wm_ubo_surfaces = {
905 .dirty = {
906 .mesa = _NEW_PROGRAM,
907 .brw = BRW_NEW_BATCH |
908 BRW_NEW_FS_PROG_DATA |
909 BRW_NEW_UNIFORM_BUFFER,
910 },
911 .emit = brw_upload_wm_ubo_surfaces,
912 };
913
914 void
915 brw_upload_abo_surfaces(struct brw_context *brw,
916 struct gl_shader_program *prog,
917 struct brw_stage_state *stage_state,
918 struct brw_stage_prog_data *prog_data)
919 {
920 struct gl_context *ctx = &brw->ctx;
921 uint32_t *surf_offsets =
922 &stage_state->surf_offset[prog_data->binding_table.abo_start];
923
924 for (int i = 0; i < prog->NumAtomicBuffers; i++) {
925 struct gl_atomic_buffer_binding *binding =
926 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
927 struct intel_buffer_object *intel_bo =
928 intel_buffer_object(binding->BufferObject);
929 drm_intel_bo *bo = intel_bufferobj_buffer(
930 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
931
932 brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
933 bo->size - binding->Offset,
934 &surf_offsets[i], true);
935 }
936
937 if (prog->NumAtomicBuffers)
938 brw->state.dirty.brw |= BRW_NEW_SURFACES;
939 }
940
941 static void
942 brw_upload_wm_abo_surfaces(struct brw_context *brw)
943 {
944 struct gl_context *ctx = &brw->ctx;
945 /* _NEW_PROGRAM */
946 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
947
948 if (prog) {
949 /* BRW_NEW_FS_PROG_DATA */
950 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
951 &brw->wm.prog_data->base);
952 }
953 }
954
955 const struct brw_tracked_state brw_wm_abo_surfaces = {
956 .dirty = {
957 .mesa = _NEW_PROGRAM,
958 .brw = BRW_NEW_ATOMIC_BUFFER |
959 BRW_NEW_BATCH |
960 BRW_NEW_FS_PROG_DATA,
961 },
962 .emit = brw_upload_wm_abo_surfaces,
963 };
964
965 void
966 gen4_init_vtable_surface_functions(struct brw_context *brw)
967 {
968 brw->vtbl.update_texture_surface = brw_update_texture_surface;
969 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
970 brw->vtbl.update_null_renderbuffer_surface =
971 brw_update_null_renderbuffer_surface;
972 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
973 }