i965: Move data from brw->vs into a base class if gs will also need it.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193
194 static void
195 brw_update_buffer_texture_surface(struct gl_context *ctx,
196 unsigned unit,
197 uint32_t *binding_table,
198 unsigned surf_index)
199 {
200 struct brw_context *brw = brw_context(ctx);
201 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
202 uint32_t *surf;
203 struct intel_buffer_object *intel_obj =
204 intel_buffer_object(tObj->BufferObject);
205 drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
206 gl_format format = tObj->_BufferObjectFormat;
207 uint32_t brw_format = brw_format_for_mesa_format(format);
208 int texel_size = _mesa_get_format_bytes(format);
209
210 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
211 _mesa_problem(NULL, "bad format %s for texture buffer\n",
212 _mesa_get_format_name(format));
213 }
214
215 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
216 6 * 4, 32, &binding_table[surf_index]);
217
218 surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
219 (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
220
221 if (brw->gen >= 6)
222 surf[0] |= BRW_SURFACE_RC_READ_WRITE;
223
224 if (bo) {
225 surf[1] = bo->offset; /* reloc */
226
227 /* Emit relocation to surface contents. */
228 drm_intel_bo_emit_reloc(brw->batch.bo,
229 binding_table[surf_index] + 4,
230 bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
231
232 int w = intel_obj->Base.Size / texel_size;
233 surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
234 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
235 surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
236 (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
237 } else {
238 surf[1] = 0;
239 surf[2] = 0;
240 surf[3] = 0;
241 }
242
243 surf[4] = 0;
244 surf[5] = 0;
245 }
246
247 static void
248 brw_update_texture_surface(struct gl_context *ctx,
249 unsigned unit,
250 uint32_t *binding_table,
251 unsigned surf_index)
252 {
253 struct brw_context *brw = brw_context(ctx);
254 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
255 struct intel_texture_object *intelObj = intel_texture_object(tObj);
256 struct intel_mipmap_tree *mt = intelObj->mt;
257 struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
258 struct intel_texture_image *intel_image = intel_texture_image(firstImage);
259 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
260 uint32_t *surf;
261
262 if (tObj->Target == GL_TEXTURE_BUFFER) {
263 brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
264 return;
265 }
266
267 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
268 6 * 4, 32, &binding_table[surf_index]);
269
270 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
271 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
272 BRW_SURFACE_CUBEFACE_ENABLES |
273 (translate_tex_format(brw,
274 mt->format,
275 tObj->DepthMode,
276 sampler->sRGBDecode) <<
277 BRW_SURFACE_FORMAT_SHIFT));
278
279 surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
280
281 surf[2] = ((intelObj->_MaxLevel - intel_image->mt->first_level) << BRW_SURFACE_LOD_SHIFT |
282 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
283 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
284
285 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
286 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
287 (intelObj->mt->region->pitch - 1) <<
288 BRW_SURFACE_PITCH_SHIFT);
289
290 surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
291
292 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
293
294 /* Emit relocation to surface contents */
295 drm_intel_bo_emit_reloc(brw->batch.bo,
296 binding_table[surf_index] + 4,
297 intelObj->mt->region->bo,
298 surf[1] - intelObj->mt->region->bo->offset,
299 I915_GEM_DOMAIN_SAMPLER, 0);
300 }
301
302 /**
303 * Create the constant buffer surface. Vertex/fragment shader constants will be
304 * read from this buffer with Data Port Read instructions/messages.
305 */
306 static void
307 brw_create_constant_surface(struct brw_context *brw,
308 drm_intel_bo *bo,
309 uint32_t offset,
310 uint32_t size,
311 uint32_t *out_offset,
312 bool dword_pitch)
313 {
314 uint32_t stride = dword_pitch ? 4 : 16;
315 uint32_t elements = ALIGN(size, stride) / stride;
316 const GLint w = elements - 1;
317 uint32_t *surf;
318
319 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
320 6 * 4, 32, out_offset);
321
322 surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
323 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
324 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
325
326 if (brw->gen >= 6)
327 surf[0] |= BRW_SURFACE_RC_READ_WRITE;
328
329 surf[1] = bo->offset + offset; /* reloc */
330
331 surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
332 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
333
334 surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
335 (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
336
337 surf[4] = 0;
338 surf[5] = 0;
339
340 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
341 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
342 * physical cache. It is mapped in hardware to the sampler cache."
343 */
344 drm_intel_bo_emit_reloc(brw->batch.bo,
345 *out_offset + 4,
346 bo, offset,
347 I915_GEM_DOMAIN_SAMPLER, 0);
348 }
349
350 /**
351 * Set up a binding table entry for use by stream output logic (transform
352 * feedback).
353 *
354 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
355 */
356 void
357 brw_update_sol_surface(struct brw_context *brw,
358 struct gl_buffer_object *buffer_obj,
359 uint32_t *out_offset, unsigned num_vector_components,
360 unsigned stride_dwords, unsigned offset_dwords)
361 {
362 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
363 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
364 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
365 out_offset);
366 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
367 uint32_t offset_bytes = 4 * offset_dwords;
368 size_t size_dwords = buffer_obj->Size / 4;
369 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
370
371 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
372 * too big to map using a single binding table entry?
373 */
374 assert((size_dwords - offset_dwords) / stride_dwords
375 <= BRW_MAX_NUM_BUFFER_ENTRIES);
376
377 if (size_dwords > offset_dwords + num_vector_components) {
378 /* There is room for at least 1 transform feedback output in the buffer.
379 * Compute the number of additional transform feedback outputs the
380 * buffer has room for.
381 */
382 buffer_size_minus_1 =
383 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
384 } else {
385 /* There isn't even room for a single transform feedback output in the
386 * buffer. We can't configure the binding table entry to prevent output
387 * entirely; we'll have to rely on the geometry shader to detect
388 * overflow. But to minimize the damage in case of a bug, set up the
389 * binding table entry to just allow a single output.
390 */
391 buffer_size_minus_1 = 0;
392 }
393 width = buffer_size_minus_1 & 0x7f;
394 height = (buffer_size_minus_1 & 0xfff80) >> 7;
395 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
396
397 switch (num_vector_components) {
398 case 1:
399 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
400 break;
401 case 2:
402 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
403 break;
404 case 3:
405 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
406 break;
407 case 4:
408 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
409 break;
410 default:
411 assert(!"Invalid vector size for transform feedback output");
412 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
413 break;
414 }
415
416 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
417 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
418 surface_format << BRW_SURFACE_FORMAT_SHIFT |
419 BRW_SURFACE_RC_READ_WRITE;
420 surf[1] = bo->offset + offset_bytes; /* reloc */
421 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
422 height << BRW_SURFACE_HEIGHT_SHIFT);
423 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
424 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
425 surf[4] = 0;
426 surf[5] = 0;
427
428 /* Emit relocation to surface contents. */
429 drm_intel_bo_emit_reloc(brw->batch.bo,
430 *out_offset + 4,
431 bo, offset_bytes,
432 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
433 }
434
435 /* Creates a new WM constant buffer reflecting the current fragment program's
436 * constants, if needed by the fragment program.
437 *
438 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
439 * state atom.
440 */
441 static void
442 brw_upload_wm_pull_constants(struct brw_context *brw)
443 {
444 struct gl_context *ctx = &brw->ctx;
445 /* BRW_NEW_FRAGMENT_PROGRAM */
446 struct brw_fragment_program *fp =
447 (struct brw_fragment_program *) brw->fragment_program;
448 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
449 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
450 const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
451 float *constants;
452 unsigned int i;
453
454 _mesa_load_state_parameters(ctx, params);
455
456 /* CACHE_NEW_WM_PROG */
457 if (brw->wm.prog_data->nr_pull_params == 0) {
458 if (brw->wm.const_bo) {
459 drm_intel_bo_unreference(brw->wm.const_bo);
460 brw->wm.const_bo = NULL;
461 brw->wm.surf_offset[surf_index] = 0;
462 brw->state.dirty.brw |= BRW_NEW_SURFACES;
463 }
464 return;
465 }
466
467 drm_intel_bo_unreference(brw->wm.const_bo);
468 brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
469 size, 64);
470
471 /* _NEW_PROGRAM_CONSTANTS */
472 drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
473 constants = brw->wm.const_bo->virtual;
474 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
475 constants[i] = *brw->wm.prog_data->pull_param[i];
476 }
477 drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
478
479 brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
480 &brw->wm.surf_offset[surf_index],
481 true);
482
483 brw->state.dirty.brw |= BRW_NEW_SURFACES;
484 }
485
486 const struct brw_tracked_state brw_wm_pull_constants = {
487 .dirty = {
488 .mesa = (_NEW_PROGRAM_CONSTANTS),
489 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
490 .cache = CACHE_NEW_WM_PROG,
491 },
492 .emit = brw_upload_wm_pull_constants,
493 };
494
495 static void
496 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
497 {
498 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
499 * Notes):
500 *
501 * A null surface will be used in instances where an actual surface is
502 * not bound. When a write message is generated to a null surface, no
503 * actual surface is written to. When a read message (including any
504 * sampling engine message) is generated to a null surface, the result
505 * is all zeros. Note that a null surface type is allowed to be used
506 * with all messages, even if it is not specificially indicated as
507 * supported. All of the remaining fields in surface state are ignored
508 * for null surfaces, with the following exceptions:
509 *
510 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
511 * depth buffer’s corresponding state for all render target surfaces,
512 * including null.
513 *
514 * - Surface Format must be R8G8B8A8_UNORM.
515 */
516 struct gl_context *ctx = &brw->ctx;
517 uint32_t *surf;
518 unsigned surface_type = BRW_SURFACE_NULL;
519 drm_intel_bo *bo = NULL;
520 unsigned pitch_minus_1 = 0;
521 uint32_t multisampling_state = 0;
522
523 /* _NEW_BUFFERS */
524 const struct gl_framebuffer *fb = ctx->DrawBuffer;
525
526 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
527 &brw->wm.surf_offset[SURF_INDEX_DRAW(unit)]);
528
529 if (fb->Visual.samples > 1) {
530 /* On Gen6, null render targets seem to cause GPU hangs when
531 * multisampling. So work around this problem by rendering into dummy
532 * color buffer.
533 *
534 * To decrease the amount of memory needed by the workaround buffer, we
535 * set its pitch to 128 bytes (the width of a Y tile). This means that
536 * the amount of memory needed for the workaround buffer is
537 * (width_in_tiles + height_in_tiles - 1) tiles.
538 *
539 * Note that since the workaround buffer will be interpreted by the
540 * hardware as an interleaved multisampled buffer, we need to compute
541 * width_in_tiles and height_in_tiles by dividing the width and height
542 * by 16 rather than the normal Y-tile size of 32.
543 */
544 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
545 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
546 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
547 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
548 size_needed);
549 bo = brw->wm.multisampled_null_render_target_bo;
550 surface_type = BRW_SURFACE_2D;
551 pitch_minus_1 = 127;
552 multisampling_state =
553 brw_get_surface_num_multisamples(fb->Visual.samples);
554 }
555
556 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
557 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
558 if (brw->gen < 6) {
559 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
560 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
561 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
562 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
563 }
564 surf[1] = bo ? bo->offset : 0;
565 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
566 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
567
568 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
569 * Notes):
570 *
571 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
572 */
573 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
574 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
575 surf[4] = multisampling_state;
576 surf[5] = 0;
577
578 if (bo) {
579 drm_intel_bo_emit_reloc(brw->batch.bo,
580 brw->wm.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
581 bo, 0,
582 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
583 }
584 }
585
586 /**
587 * Sets up a surface state structure to point at the given region.
588 * While it is only used for the front/back buffer currently, it should be
589 * usable for further buffers when doing ARB_draw_buffer support.
590 */
591 static void
592 brw_update_renderbuffer_surface(struct brw_context *brw,
593 struct gl_renderbuffer *rb,
594 bool layered,
595 unsigned int unit)
596 {
597 struct gl_context *ctx = &brw->ctx;
598 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
599 struct intel_mipmap_tree *mt = irb->mt;
600 struct intel_region *region;
601 uint32_t *surf;
602 uint32_t tile_x, tile_y;
603 uint32_t format = 0;
604 /* _NEW_BUFFERS */
605 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
606
607 assert(!layered);
608
609 if (rb->TexImage && !brw->has_surface_tile_offset) {
610 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
611
612 if (tile_x != 0 || tile_y != 0) {
613 /* Original gen4 hardware couldn't draw to a non-tile-aligned
614 * destination in a miptree unless you actually setup your renderbuffer
615 * as a miptree and used the fragile lod/array_index/etc. controls to
616 * select the image. So, instead, we just make a new single-level
617 * miptree and render into that.
618 */
619 intel_renderbuffer_move_to_temp(brw, irb, false);
620 mt = irb->mt;
621 }
622 }
623
624 intel_miptree_used_for_rendering(irb->mt);
625
626 region = irb->mt->region;
627
628 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
629 &brw->wm.surf_offset[SURF_INDEX_DRAW(unit)]);
630
631 format = brw->render_target_format[rb_format];
632 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
633 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
634 __FUNCTION__, _mesa_get_format_name(rb_format));
635 }
636
637 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
638 format << BRW_SURFACE_FORMAT_SHIFT);
639
640 /* reloc */
641 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
642 region->bo->offset);
643
644 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
645 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
646
647 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
648 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
649
650 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
651
652 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
653 /* Note that the low bits of these fields are missing, so
654 * there's the possibility of getting in trouble.
655 */
656 assert(tile_x % 4 == 0);
657 assert(tile_y % 2 == 0);
658 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
659 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
660 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
661
662 if (brw->gen < 6) {
663 /* _NEW_COLOR */
664 if (!ctx->Color.ColorLogicOpEnabled &&
665 (ctx->Color.BlendEnabled & (1 << unit)))
666 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
667
668 if (!ctx->Color.ColorMask[unit][0])
669 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
670 if (!ctx->Color.ColorMask[unit][1])
671 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
672 if (!ctx->Color.ColorMask[unit][2])
673 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
674
675 /* As mentioned above, disable writes to the alpha component when the
676 * renderbuffer is XRGB.
677 */
678 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
679 !ctx->Color.ColorMask[unit][3]) {
680 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
681 }
682 }
683
684 drm_intel_bo_emit_reloc(brw->batch.bo,
685 brw->wm.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
686 region->bo,
687 surf[1] - region->bo->offset,
688 I915_GEM_DOMAIN_RENDER,
689 I915_GEM_DOMAIN_RENDER);
690 }
691
692 /**
693 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
694 */
695 static void
696 brw_update_renderbuffer_surfaces(struct brw_context *brw)
697 {
698 struct gl_context *ctx = &brw->ctx;
699 GLuint i;
700
701 /* _NEW_BUFFERS | _NEW_COLOR */
702 /* Update surfaces for drawing buffers */
703 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
704 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
705 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
706 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
707 ctx->DrawBuffer->Layered, i);
708 } else {
709 brw->vtbl.update_null_renderbuffer_surface(brw, i);
710 }
711 }
712 } else {
713 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
714 }
715 brw->state.dirty.brw |= BRW_NEW_SURFACES;
716 }
717
718 const struct brw_tracked_state brw_renderbuffer_surfaces = {
719 .dirty = {
720 .mesa = (_NEW_COLOR |
721 _NEW_BUFFERS),
722 .brw = BRW_NEW_BATCH,
723 .cache = 0
724 },
725 .emit = brw_update_renderbuffer_surfaces,
726 };
727
728 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
729 .dirty = {
730 .mesa = _NEW_BUFFERS,
731 .brw = BRW_NEW_BATCH,
732 .cache = 0
733 },
734 .emit = brw_update_renderbuffer_surfaces,
735 };
736
737 /**
738 * Construct SURFACE_STATE objects for enabled textures.
739 */
740 static void
741 brw_update_texture_surfaces(struct brw_context *brw)
742 {
743 struct gl_context *ctx = &brw->ctx;
744
745 /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
746 * Unfortunately, we're stuck using the gl_program structs until the
747 * ARB_fragment_program front-end gets converted to GLSL IR. These
748 * have the downside that SamplerUnits is split and only contains the
749 * mappings for samplers active in that stage.
750 */
751 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
752 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
753
754 unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
755
756 for (unsigned s = 0; s < num_samplers; s++) {
757 brw->vs.base.surf_offset[SURF_INDEX_VEC4_TEXTURE(s)] = 0;
758 brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
759
760 if (vs->SamplersUsed & (1 << s)) {
761 const unsigned unit = vs->SamplerUnits[s];
762
763 /* _NEW_TEXTURE */
764 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
765 brw->vtbl.update_texture_surface(ctx, unit,
766 brw->vs.base.surf_offset,
767 SURF_INDEX_VEC4_TEXTURE(s));
768 }
769 }
770
771 if (fs->SamplersUsed & (1 << s)) {
772 const unsigned unit = fs->SamplerUnits[s];
773
774 /* _NEW_TEXTURE */
775 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
776 brw->vtbl.update_texture_surface(ctx, unit,
777 brw->wm.surf_offset,
778 SURF_INDEX_TEXTURE(s));
779 }
780 }
781 }
782
783 brw->state.dirty.brw |= BRW_NEW_SURFACES;
784 }
785
786 const struct brw_tracked_state brw_texture_surfaces = {
787 .dirty = {
788 .mesa = _NEW_TEXTURE,
789 .brw = BRW_NEW_BATCH |
790 BRW_NEW_VERTEX_PROGRAM |
791 BRW_NEW_FRAGMENT_PROGRAM,
792 .cache = 0
793 },
794 .emit = brw_update_texture_surfaces,
795 };
796
797 void
798 brw_upload_ubo_surfaces(struct brw_context *brw,
799 struct gl_shader *shader,
800 uint32_t *surf_offsets)
801 {
802 struct gl_context *ctx = &brw->ctx;
803
804 if (!shader)
805 return;
806
807 for (int i = 0; i < shader->NumUniformBlocks; i++) {
808 struct gl_uniform_buffer_binding *binding;
809 struct intel_buffer_object *intel_bo;
810
811 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
812 intel_bo = intel_buffer_object(binding->BufferObject);
813 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
814
815 /* Because behavior for referencing outside of the binding's size in the
816 * glBindBufferRange case is undefined, we can just bind the whole buffer
817 * glBindBufferBase wants and be a correct implementation.
818 */
819 brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
820 bo->size - binding->Offset,
821 &surf_offsets[i],
822 shader->Type == GL_FRAGMENT_SHADER);
823 }
824
825 if (shader->NumUniformBlocks)
826 brw->state.dirty.brw |= BRW_NEW_SURFACES;
827 }
828
829 static void
830 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
831 {
832 struct gl_context *ctx = &brw->ctx;
833 /* _NEW_PROGRAM */
834 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
835
836 if (!prog)
837 return;
838
839 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
840 &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
841 }
842
843 const struct brw_tracked_state brw_wm_ubo_surfaces = {
844 .dirty = {
845 .mesa = _NEW_PROGRAM,
846 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
847 .cache = 0,
848 },
849 .emit = brw_upload_wm_ubo_surfaces,
850 };
851
852 /**
853 * Constructs the binding table for the WM surface state, which maps unit
854 * numbers to surface state objects.
855 */
856 static void
857 brw_upload_wm_binding_table(struct brw_context *brw)
858 {
859 uint32_t *bind;
860 int i;
861
862 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
863 gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
864 }
865
866 /* CACHE_NEW_WM_PROG */
867 unsigned entries = brw->wm.prog_data->binding_table_size;
868 bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
869 sizeof(uint32_t) * entries,
870 32, &brw->wm.bind_bo_offset);
871
872 /* BRW_NEW_SURFACES */
873 for (i = 0; i < entries; i++) {
874 bind[i] = brw->wm.surf_offset[i];
875 }
876
877 brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
878 }
879
880 const struct brw_tracked_state brw_wm_binding_table = {
881 .dirty = {
882 .mesa = 0,
883 .brw = (BRW_NEW_BATCH |
884 BRW_NEW_SURFACES),
885 .cache = CACHE_NEW_WM_PROG
886 },
887 .emit = brw_upload_wm_binding_table,
888 };
889
890 void
891 gen4_init_vtable_surface_functions(struct brw_context *brw)
892 {
893 brw->vtbl.update_texture_surface = brw_update_texture_surface;
894 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
895 brw->vtbl.update_null_renderbuffer_surface =
896 brw_update_null_renderbuffer_surface;
897 brw->vtbl.create_constant_surface = brw_create_constant_surface;
898 }