i965: Move intel_context::batch to brw_context.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193
194 static void
195 brw_update_buffer_texture_surface(struct gl_context *ctx,
196 unsigned unit,
197 uint32_t *binding_table,
198 unsigned surf_index)
199 {
200 struct brw_context *brw = brw_context(ctx);
201 struct intel_context *intel = &brw->intel;
202 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
203 uint32_t *surf;
204 struct intel_buffer_object *intel_obj =
205 intel_buffer_object(tObj->BufferObject);
206 drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
207 gl_format format = tObj->_BufferObjectFormat;
208 uint32_t brw_format = brw_format_for_mesa_format(format);
209 int texel_size = _mesa_get_format_bytes(format);
210
211 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
212 _mesa_problem(NULL, "bad format %s for texture buffer\n",
213 _mesa_get_format_name(format));
214 }
215
216 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
217 6 * 4, 32, &binding_table[surf_index]);
218
219 surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
220 (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
221
222 if (intel->gen >= 6)
223 surf[0] |= BRW_SURFACE_RC_READ_WRITE;
224
225 if (bo) {
226 surf[1] = bo->offset; /* reloc */
227
228 /* Emit relocation to surface contents. */
229 drm_intel_bo_emit_reloc(brw->batch.bo,
230 binding_table[surf_index] + 4,
231 bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
232
233 int w = intel_obj->Base.Size / texel_size;
234 surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
235 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
236 surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
237 (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
238 } else {
239 surf[1] = 0;
240 surf[2] = 0;
241 surf[3] = 0;
242 }
243
244 surf[4] = 0;
245 surf[5] = 0;
246 }
247
248 static void
249 brw_update_texture_surface(struct gl_context *ctx,
250 unsigned unit,
251 uint32_t *binding_table,
252 unsigned surf_index)
253 {
254 struct brw_context *brw = brw_context(ctx);
255 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
256 struct intel_texture_object *intelObj = intel_texture_object(tObj);
257 struct intel_mipmap_tree *mt = intelObj->mt;
258 struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
259 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
260 uint32_t *surf;
261 uint32_t tile_x, tile_y;
262
263 if (tObj->Target == GL_TEXTURE_BUFFER) {
264 brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
265 return;
266 }
267
268 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
269 6 * 4, 32, &binding_table[surf_index]);
270
271 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
272 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
273 BRW_SURFACE_CUBEFACE_ENABLES |
274 (translate_tex_format(brw,
275 mt->format,
276 tObj->DepthMode,
277 sampler->sRGBDecode) <<
278 BRW_SURFACE_FORMAT_SHIFT));
279
280 surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
281 surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
282 &tile_x, &tile_y);
283
284 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
285 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
286 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
287
288 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
289 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
290 (intelObj->mt->region->pitch - 1) <<
291 BRW_SURFACE_PITCH_SHIFT);
292
293 surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
294
295 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
296 /* Note that the low bits of these fields are missing, so
297 * there's the possibility of getting in trouble.
298 */
299 assert(tile_x % 4 == 0);
300 assert(tile_y % 2 == 0);
301 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
302 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
303 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
304
305 /* Emit relocation to surface contents */
306 drm_intel_bo_emit_reloc(brw->batch.bo,
307 binding_table[surf_index] + 4,
308 intelObj->mt->region->bo,
309 surf[1] - intelObj->mt->region->bo->offset,
310 I915_GEM_DOMAIN_SAMPLER, 0);
311 }
312
313 /**
314 * Create the constant buffer surface. Vertex/fragment shader constants will be
315 * read from this buffer with Data Port Read instructions/messages.
316 */
317 static void
318 brw_create_constant_surface(struct brw_context *brw,
319 drm_intel_bo *bo,
320 uint32_t offset,
321 uint32_t size,
322 uint32_t *out_offset,
323 bool dword_pitch)
324 {
325 struct intel_context *intel = &brw->intel;
326 uint32_t stride = dword_pitch ? 4 : 16;
327 uint32_t elements = ALIGN(size, stride) / stride;
328 const GLint w = elements - 1;
329 uint32_t *surf;
330
331 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
332 6 * 4, 32, out_offset);
333
334 surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
335 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
336 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
337
338 if (intel->gen >= 6)
339 surf[0] |= BRW_SURFACE_RC_READ_WRITE;
340
341 surf[1] = bo->offset + offset; /* reloc */
342
343 surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
344 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
345
346 surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
347 (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
348
349 surf[4] = 0;
350 surf[5] = 0;
351
352 /* Emit relocation to surface contents. Section 5.1.1 of the gen4
353 * bspec ("Data Cache") says that the data cache does not exist as
354 * a separate cache and is just the sampler cache.
355 */
356 drm_intel_bo_emit_reloc(brw->batch.bo,
357 *out_offset + 4,
358 bo, offset,
359 I915_GEM_DOMAIN_SAMPLER, 0);
360 }
361
362 /**
363 * Set up a binding table entry for use by stream output logic (transform
364 * feedback).
365 *
366 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
367 */
368 void
369 brw_update_sol_surface(struct brw_context *brw,
370 struct gl_buffer_object *buffer_obj,
371 uint32_t *out_offset, unsigned num_vector_components,
372 unsigned stride_dwords, unsigned offset_dwords)
373 {
374 struct intel_context *intel = &brw->intel;
375 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
376 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
377 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
378 out_offset);
379 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
380 uint32_t offset_bytes = 4 * offset_dwords;
381 size_t size_dwords = buffer_obj->Size / 4;
382 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
383
384 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
385 * too big to map using a single binding table entry?
386 */
387 assert((size_dwords - offset_dwords) / stride_dwords
388 <= BRW_MAX_NUM_BUFFER_ENTRIES);
389
390 if (size_dwords > offset_dwords + num_vector_components) {
391 /* There is room for at least 1 transform feedback output in the buffer.
392 * Compute the number of additional transform feedback outputs the
393 * buffer has room for.
394 */
395 buffer_size_minus_1 =
396 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
397 } else {
398 /* There isn't even room for a single transform feedback output in the
399 * buffer. We can't configure the binding table entry to prevent output
400 * entirely; we'll have to rely on the geometry shader to detect
401 * overflow. But to minimize the damage in case of a bug, set up the
402 * binding table entry to just allow a single output.
403 */
404 buffer_size_minus_1 = 0;
405 }
406 width = buffer_size_minus_1 & 0x7f;
407 height = (buffer_size_minus_1 & 0xfff80) >> 7;
408 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
409
410 switch (num_vector_components) {
411 case 1:
412 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
413 break;
414 case 2:
415 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
416 break;
417 case 3:
418 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
419 break;
420 case 4:
421 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
422 break;
423 default:
424 assert(!"Invalid vector size for transform feedback output");
425 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
426 break;
427 }
428
429 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
430 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
431 surface_format << BRW_SURFACE_FORMAT_SHIFT |
432 BRW_SURFACE_RC_READ_WRITE;
433 surf[1] = bo->offset + offset_bytes; /* reloc */
434 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
435 height << BRW_SURFACE_HEIGHT_SHIFT);
436 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
437 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
438 surf[4] = 0;
439 surf[5] = 0;
440
441 /* Emit relocation to surface contents. */
442 drm_intel_bo_emit_reloc(brw->batch.bo,
443 *out_offset + 4,
444 bo, offset_bytes,
445 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
446 }
447
448 /* Creates a new WM constant buffer reflecting the current fragment program's
449 * constants, if needed by the fragment program.
450 *
451 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
452 * state atom.
453 */
454 static void
455 brw_upload_wm_pull_constants(struct brw_context *brw)
456 {
457 struct gl_context *ctx = &brw->intel.ctx;
458 struct intel_context *intel = &brw->intel;
459 /* BRW_NEW_FRAGMENT_PROGRAM */
460 struct brw_fragment_program *fp =
461 (struct brw_fragment_program *) brw->fragment_program;
462 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
463 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
464 const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
465 float *constants;
466 unsigned int i;
467
468 _mesa_load_state_parameters(ctx, params);
469
470 /* CACHE_NEW_WM_PROG */
471 if (brw->wm.prog_data->nr_pull_params == 0) {
472 if (brw->wm.const_bo) {
473 drm_intel_bo_unreference(brw->wm.const_bo);
474 brw->wm.const_bo = NULL;
475 brw->wm.surf_offset[surf_index] = 0;
476 brw->state.dirty.brw |= BRW_NEW_SURFACES;
477 }
478 return;
479 }
480
481 drm_intel_bo_unreference(brw->wm.const_bo);
482 brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
483 size, 64);
484
485 /* _NEW_PROGRAM_CONSTANTS */
486 drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
487 constants = brw->wm.const_bo->virtual;
488 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
489 constants[i] = *brw->wm.prog_data->pull_param[i];
490 }
491 drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
492
493 brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
494 &brw->wm.surf_offset[surf_index],
495 true);
496
497 brw->state.dirty.brw |= BRW_NEW_SURFACES;
498 }
499
500 const struct brw_tracked_state brw_wm_pull_constants = {
501 .dirty = {
502 .mesa = (_NEW_PROGRAM_CONSTANTS),
503 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
504 .cache = CACHE_NEW_WM_PROG,
505 },
506 .emit = brw_upload_wm_pull_constants,
507 };
508
509 static void
510 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
511 {
512 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
513 * Notes):
514 *
515 * A null surface will be used in instances where an actual surface is
516 * not bound. When a write message is generated to a null surface, no
517 * actual surface is written to. When a read message (including any
518 * sampling engine message) is generated to a null surface, the result
519 * is all zeros. Note that a null surface type is allowed to be used
520 * with all messages, even if it is not specificially indicated as
521 * supported. All of the remaining fields in surface state are ignored
522 * for null surfaces, with the following exceptions:
523 *
524 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
525 * depth buffer’s corresponding state for all render target surfaces,
526 * including null.
527 *
528 * - Surface Format must be R8G8B8A8_UNORM.
529 */
530 struct intel_context *intel = &brw->intel;
531 struct gl_context *ctx = &intel->ctx;
532 uint32_t *surf;
533 unsigned surface_type = BRW_SURFACE_NULL;
534 drm_intel_bo *bo = NULL;
535 unsigned pitch_minus_1 = 0;
536 uint32_t multisampling_state = 0;
537
538 /* _NEW_BUFFERS */
539 const struct gl_framebuffer *fb = ctx->DrawBuffer;
540
541 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
542 6 * 4, 32, &brw->wm.surf_offset[unit]);
543
544 if (fb->Visual.samples > 1) {
545 /* On Gen6, null render targets seem to cause GPU hangs when
546 * multisampling. So work around this problem by rendering into dummy
547 * color buffer.
548 *
549 * To decrease the amount of memory needed by the workaround buffer, we
550 * set its pitch to 128 bytes (the width of a Y tile). This means that
551 * the amount of memory needed for the workaround buffer is
552 * (width_in_tiles + height_in_tiles - 1) tiles.
553 *
554 * Note that since the workaround buffer will be interpreted by the
555 * hardware as an interleaved multisampled buffer, we need to compute
556 * width_in_tiles and height_in_tiles by dividing the width and height
557 * by 16 rather than the normal Y-tile size of 32.
558 */
559 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
560 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
561 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
562 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
563 size_needed);
564 bo = brw->wm.multisampled_null_render_target_bo;
565 surface_type = BRW_SURFACE_2D;
566 pitch_minus_1 = 127;
567 multisampling_state =
568 brw_get_surface_num_multisamples(fb->Visual.samples);
569 }
570
571 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
572 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
573 if (intel->gen < 6) {
574 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
575 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
576 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
577 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
578 }
579 surf[1] = bo ? bo->offset : 0;
580 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
581 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
582
583 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
584 * Notes):
585 *
586 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
587 */
588 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
589 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
590 surf[4] = multisampling_state;
591 surf[5] = 0;
592
593 if (bo) {
594 drm_intel_bo_emit_reloc(brw->batch.bo,
595 brw->wm.surf_offset[unit] + 4,
596 bo, 0,
597 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
598 }
599 }
600
601 /**
602 * Sets up a surface state structure to point at the given region.
603 * While it is only used for the front/back buffer currently, it should be
604 * usable for further buffers when doing ARB_draw_buffer support.
605 */
606 static void
607 brw_update_renderbuffer_surface(struct brw_context *brw,
608 struct gl_renderbuffer *rb,
609 bool layered,
610 unsigned int unit)
611 {
612 struct intel_context *intel = &brw->intel;
613 struct gl_context *ctx = &intel->ctx;
614 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
615 struct intel_mipmap_tree *mt = irb->mt;
616 struct intel_region *region;
617 uint32_t *surf;
618 uint32_t tile_x, tile_y;
619 uint32_t format = 0;
620 /* _NEW_BUFFERS */
621 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
622
623 assert(!layered);
624
625 if (rb->TexImage && !brw->has_surface_tile_offset) {
626 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
627
628 if (tile_x != 0 || tile_y != 0) {
629 /* Original gen4 hardware couldn't draw to a non-tile-aligned
630 * destination in a miptree unless you actually setup your renderbuffer
631 * as a miptree and used the fragile lod/array_index/etc. controls to
632 * select the image. So, instead, we just make a new single-level
633 * miptree and render into that.
634 */
635 intel_renderbuffer_move_to_temp(brw, irb, false);
636 mt = irb->mt;
637 }
638 }
639
640 intel_miptree_used_for_rendering(irb->mt);
641
642 region = irb->mt->region;
643
644 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
645 6 * 4, 32, &brw->wm.surf_offset[unit]);
646
647 format = brw->render_target_format[rb_format];
648 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
649 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
650 __FUNCTION__, _mesa_get_format_name(rb_format));
651 }
652
653 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
654 format << BRW_SURFACE_FORMAT_SHIFT);
655
656 /* reloc */
657 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
658 region->bo->offset);
659
660 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
661 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
662
663 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
664 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
665
666 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
667
668 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
669 /* Note that the low bits of these fields are missing, so
670 * there's the possibility of getting in trouble.
671 */
672 assert(tile_x % 4 == 0);
673 assert(tile_y % 2 == 0);
674 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
675 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
676 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
677
678 if (intel->gen < 6) {
679 /* _NEW_COLOR */
680 if (!ctx->Color.ColorLogicOpEnabled &&
681 (ctx->Color.BlendEnabled & (1 << unit)))
682 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
683
684 if (!ctx->Color.ColorMask[unit][0])
685 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
686 if (!ctx->Color.ColorMask[unit][1])
687 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
688 if (!ctx->Color.ColorMask[unit][2])
689 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
690
691 /* As mentioned above, disable writes to the alpha component when the
692 * renderbuffer is XRGB.
693 */
694 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
695 !ctx->Color.ColorMask[unit][3]) {
696 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
697 }
698 }
699
700 drm_intel_bo_emit_reloc(brw->batch.bo,
701 brw->wm.surf_offset[unit] + 4,
702 region->bo,
703 surf[1] - region->bo->offset,
704 I915_GEM_DOMAIN_RENDER,
705 I915_GEM_DOMAIN_RENDER);
706 }
707
708 /**
709 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
710 */
711 static void
712 brw_update_renderbuffer_surfaces(struct brw_context *brw)
713 {
714 struct gl_context *ctx = &brw->intel.ctx;
715 GLuint i;
716
717 /* _NEW_BUFFERS | _NEW_COLOR */
718 /* Update surfaces for drawing buffers */
719 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
720 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
721 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
722 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
723 ctx->DrawBuffer->Layered, i);
724 } else {
725 brw->vtbl.update_null_renderbuffer_surface(brw, i);
726 }
727 }
728 } else {
729 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
730 }
731 brw->state.dirty.brw |= BRW_NEW_SURFACES;
732 }
733
734 const struct brw_tracked_state brw_renderbuffer_surfaces = {
735 .dirty = {
736 .mesa = (_NEW_COLOR |
737 _NEW_BUFFERS),
738 .brw = BRW_NEW_BATCH,
739 .cache = 0
740 },
741 .emit = brw_update_renderbuffer_surfaces,
742 };
743
744 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
745 .dirty = {
746 .mesa = _NEW_BUFFERS,
747 .brw = BRW_NEW_BATCH,
748 .cache = 0
749 },
750 .emit = brw_update_renderbuffer_surfaces,
751 };
752
753 /**
754 * Construct SURFACE_STATE objects for enabled textures.
755 */
756 static void
757 brw_update_texture_surfaces(struct brw_context *brw)
758 {
759 struct intel_context *intel = &brw->intel;
760 struct gl_context *ctx = &intel->ctx;
761
762 /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
763 * Unfortunately, we're stuck using the gl_program structs until the
764 * ARB_fragment_program front-end gets converted to GLSL IR. These
765 * have the downside that SamplerUnits is split and only contains the
766 * mappings for samplers active in that stage.
767 */
768 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
769 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
770
771 unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
772
773 for (unsigned s = 0; s < num_samplers; s++) {
774 brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
775 brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
776
777 if (vs->SamplersUsed & (1 << s)) {
778 const unsigned unit = vs->SamplerUnits[s];
779
780 /* _NEW_TEXTURE */
781 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
782 brw->vtbl.update_texture_surface(ctx, unit,
783 brw->vs.surf_offset,
784 SURF_INDEX_VS_TEXTURE(s));
785 }
786 }
787
788 if (fs->SamplersUsed & (1 << s)) {
789 const unsigned unit = fs->SamplerUnits[s];
790
791 /* _NEW_TEXTURE */
792 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
793 brw->vtbl.update_texture_surface(ctx, unit,
794 brw->wm.surf_offset,
795 SURF_INDEX_TEXTURE(s));
796 }
797 }
798 }
799
800 brw->state.dirty.brw |= BRW_NEW_SURFACES;
801 }
802
803 const struct brw_tracked_state brw_texture_surfaces = {
804 .dirty = {
805 .mesa = _NEW_TEXTURE,
806 .brw = BRW_NEW_BATCH |
807 BRW_NEW_VERTEX_PROGRAM |
808 BRW_NEW_FRAGMENT_PROGRAM,
809 .cache = 0
810 },
811 .emit = brw_update_texture_surfaces,
812 };
813
814 void
815 brw_upload_ubo_surfaces(struct brw_context *brw,
816 struct gl_shader *shader,
817 uint32_t *surf_offsets)
818 {
819 struct gl_context *ctx = &brw->intel.ctx;
820
821 if (!shader)
822 return;
823
824 for (int i = 0; i < shader->NumUniformBlocks; i++) {
825 struct gl_uniform_buffer_binding *binding;
826 struct intel_buffer_object *intel_bo;
827
828 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
829 intel_bo = intel_buffer_object(binding->BufferObject);
830 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
831
832 /* Because behavior for referencing outside of the binding's size in the
833 * glBindBufferRange case is undefined, we can just bind the whole buffer
834 * glBindBufferBase wants and be a correct implementation.
835 */
836 brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
837 bo->size - binding->Offset,
838 &surf_offsets[i],
839 shader->Type == GL_FRAGMENT_SHADER);
840 }
841
842 if (shader->NumUniformBlocks)
843 brw->state.dirty.brw |= BRW_NEW_SURFACES;
844 }
845
846 static void
847 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
848 {
849 struct gl_context *ctx = &brw->intel.ctx;
850 /* _NEW_PROGRAM */
851 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
852
853 if (!prog)
854 return;
855
856 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
857 &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
858 }
859
860 const struct brw_tracked_state brw_wm_ubo_surfaces = {
861 .dirty = {
862 .mesa = _NEW_PROGRAM,
863 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
864 .cache = 0,
865 },
866 .emit = brw_upload_wm_ubo_surfaces,
867 };
868
869 /**
870 * Constructs the binding table for the WM surface state, which maps unit
871 * numbers to surface state objects.
872 */
873 static void
874 brw_upload_wm_binding_table(struct brw_context *brw)
875 {
876 uint32_t *bind;
877 int i;
878
879 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
880 gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
881 }
882
883 /* Might want to calculate nr_surfaces first, to avoid taking up so much
884 * space for the binding table.
885 */
886 bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
887 sizeof(uint32_t) * BRW_MAX_WM_SURFACES,
888 32, &brw->wm.bind_bo_offset);
889
890 /* BRW_NEW_SURFACES */
891 for (i = 0; i < BRW_MAX_WM_SURFACES; i++) {
892 bind[i] = brw->wm.surf_offset[i];
893 }
894
895 brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
896 }
897
898 const struct brw_tracked_state brw_wm_binding_table = {
899 .dirty = {
900 .mesa = 0,
901 .brw = (BRW_NEW_BATCH |
902 BRW_NEW_SURFACES),
903 .cache = 0
904 },
905 .emit = brw_upload_wm_binding_table,
906 };
907
908 void
909 gen4_init_vtable_surface_functions(struct brw_context *brw)
910 {
911 brw->vtbl.update_texture_surface = brw_update_texture_surface;
912 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
913 brw->vtbl.update_null_renderbuffer_surface =
914 brw_update_null_renderbuffer_surface;
915 brw->vtbl.create_constant_surface = brw_create_constant_surface;
916 }