i965: Use the new drm_intel_bo offset64 field.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193 static void
194 gen4_emit_buffer_surface_state(struct brw_context *brw,
195 uint32_t *out_offset,
196 drm_intel_bo *bo,
197 unsigned buffer_offset,
198 unsigned surface_format,
199 unsigned buffer_size,
200 unsigned pitch,
201 unsigned mocs,
202 bool rw)
203 {
204 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
205 6 * 4, 32, out_offset);
206 memset(surf, 0, 6 * 4);
207
208 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
209 surface_format << BRW_SURFACE_FORMAT_SHIFT |
210 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
211 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
212 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
213 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
214 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
215 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
216
217 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
218 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
219 * physical cache. It is mapped in hardware to the sampler cache."
220 */
221 if (bo) {
222 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
223 bo, buffer_offset,
224 I915_GEM_DOMAIN_SAMPLER,
225 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
226 }
227 }
228
229 void
230 brw_update_buffer_texture_surface(struct gl_context *ctx,
231 unsigned unit,
232 uint32_t *surf_offset)
233 {
234 struct brw_context *brw = brw_context(ctx);
235 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
236 struct intel_buffer_object *intel_obj =
237 intel_buffer_object(tObj->BufferObject);
238 uint32_t size = tObj->BufferSize;
239 drm_intel_bo *bo = NULL;
240 gl_format format = tObj->_BufferObjectFormat;
241 uint32_t brw_format = brw_format_for_mesa_format(format);
242 int texel_size = _mesa_get_format_bytes(format);
243
244 if (intel_obj) {
245 size = MIN2(size, intel_obj->Base.Size);
246 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
247 }
248
249 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
250 _mesa_problem(NULL, "bad format %s for texture buffer\n",
251 _mesa_get_format_name(format));
252 }
253
254 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
255 tObj->BufferOffset,
256 brw_format,
257 size / texel_size,
258 texel_size,
259 0, /* mocs */
260 false /* rw */);
261 }
262
263 static void
264 brw_update_texture_surface(struct gl_context *ctx,
265 unsigned unit,
266 uint32_t *surf_offset,
267 bool for_gather)
268 {
269 struct brw_context *brw = brw_context(ctx);
270 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
271 struct intel_texture_object *intelObj = intel_texture_object(tObj);
272 struct intel_mipmap_tree *mt = intelObj->mt;
273 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
274 uint32_t *surf;
275
276 /* BRW_NEW_UNIFORM_BUFFER */
277 if (tObj->Target == GL_TEXTURE_BUFFER) {
278 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
279 return;
280 }
281
282 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
283 6 * 4, 32, surf_offset);
284
285 (void) for_gather; /* no w/a to apply for this gen */
286
287 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
288 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
289 BRW_SURFACE_CUBEFACE_ENABLES |
290 (translate_tex_format(brw,
291 mt->format,
292 sampler->sRGBDecode) <<
293 BRW_SURFACE_FORMAT_SHIFT));
294
295 surf[1] = intelObj->mt->region->bo->offset64 + intelObj->mt->offset; /* reloc */
296
297 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
298 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
299 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
300
301 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
302 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
303 (intelObj->mt->region->pitch - 1) <<
304 BRW_SURFACE_PITCH_SHIFT);
305
306 surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
307 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
308
309 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
310
311 /* Emit relocation to surface contents */
312 drm_intel_bo_emit_reloc(brw->batch.bo,
313 *surf_offset + 4,
314 intelObj->mt->region->bo,
315 surf[1] - intelObj->mt->region->bo->offset64,
316 I915_GEM_DOMAIN_SAMPLER, 0);
317 }
318
319 /**
320 * Create the constant buffer surface. Vertex/fragment shader constants will be
321 * read from this buffer with Data Port Read instructions/messages.
322 */
323 void
324 brw_create_constant_surface(struct brw_context *brw,
325 drm_intel_bo *bo,
326 uint32_t offset,
327 uint32_t size,
328 uint32_t *out_offset,
329 bool dword_pitch)
330 {
331 uint32_t stride = dword_pitch ? 4 : 16;
332 uint32_t elements = ALIGN(size, stride) / stride;
333
334 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
335 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
336 elements, stride, 0, false);
337 }
338
339 /**
340 * Set up a binding table entry for use by stream output logic (transform
341 * feedback).
342 *
343 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
344 */
345 void
346 brw_update_sol_surface(struct brw_context *brw,
347 struct gl_buffer_object *buffer_obj,
348 uint32_t *out_offset, unsigned num_vector_components,
349 unsigned stride_dwords, unsigned offset_dwords)
350 {
351 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
352 uint32_t offset_bytes = 4 * offset_dwords;
353 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
354 offset_bytes,
355 buffer_obj->Size - offset_bytes);
356 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
357 out_offset);
358 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
359 size_t size_dwords = buffer_obj->Size / 4;
360 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
361
362 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
363 * too big to map using a single binding table entry?
364 */
365 assert((size_dwords - offset_dwords) / stride_dwords
366 <= BRW_MAX_NUM_BUFFER_ENTRIES);
367
368 if (size_dwords > offset_dwords + num_vector_components) {
369 /* There is room for at least 1 transform feedback output in the buffer.
370 * Compute the number of additional transform feedback outputs the
371 * buffer has room for.
372 */
373 buffer_size_minus_1 =
374 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
375 } else {
376 /* There isn't even room for a single transform feedback output in the
377 * buffer. We can't configure the binding table entry to prevent output
378 * entirely; we'll have to rely on the geometry shader to detect
379 * overflow. But to minimize the damage in case of a bug, set up the
380 * binding table entry to just allow a single output.
381 */
382 buffer_size_minus_1 = 0;
383 }
384 width = buffer_size_minus_1 & 0x7f;
385 height = (buffer_size_minus_1 & 0xfff80) >> 7;
386 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
387
388 switch (num_vector_components) {
389 case 1:
390 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
391 break;
392 case 2:
393 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
394 break;
395 case 3:
396 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
397 break;
398 case 4:
399 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
400 break;
401 default:
402 assert(!"Invalid vector size for transform feedback output");
403 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
404 break;
405 }
406
407 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
408 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
409 surface_format << BRW_SURFACE_FORMAT_SHIFT |
410 BRW_SURFACE_RC_READ_WRITE;
411 surf[1] = bo->offset64 + offset_bytes; /* reloc */
412 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
413 height << BRW_SURFACE_HEIGHT_SHIFT);
414 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
415 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
416 surf[4] = 0;
417 surf[5] = 0;
418
419 /* Emit relocation to surface contents. */
420 drm_intel_bo_emit_reloc(brw->batch.bo,
421 *out_offset + 4,
422 bo, offset_bytes,
423 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
424 }
425
426 /* Creates a new WM constant buffer reflecting the current fragment program's
427 * constants, if needed by the fragment program.
428 *
429 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
430 * state atom.
431 */
432 static void
433 brw_upload_wm_pull_constants(struct brw_context *brw)
434 {
435 struct gl_context *ctx = &brw->ctx;
436 /* BRW_NEW_FRAGMENT_PROGRAM */
437 struct brw_fragment_program *fp =
438 (struct brw_fragment_program *) brw->fragment_program;
439 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
440 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
441 const int surf_index =
442 brw->wm.prog_data->base.binding_table.pull_constants_start;
443 float *constants;
444 unsigned int i;
445
446 _mesa_load_state_parameters(ctx, params);
447
448 /* CACHE_NEW_WM_PROG */
449 if (brw->wm.prog_data->nr_pull_params == 0) {
450 if (brw->wm.base.const_bo) {
451 drm_intel_bo_unreference(brw->wm.base.const_bo);
452 brw->wm.base.const_bo = NULL;
453 brw->wm.base.surf_offset[surf_index] = 0;
454 brw->state.dirty.brw |= BRW_NEW_SURFACES;
455 }
456 return;
457 }
458
459 drm_intel_bo_unreference(brw->wm.base.const_bo);
460 brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
461 size, 64);
462
463 /* _NEW_PROGRAM_CONSTANTS */
464 drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
465 constants = brw->wm.base.const_bo->virtual;
466 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
467 constants[i] = *brw->wm.prog_data->pull_param[i];
468 }
469 drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
470
471 brw_create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
472 &brw->wm.base.surf_offset[surf_index],
473 true);
474
475 brw->state.dirty.brw |= BRW_NEW_SURFACES;
476 }
477
478 const struct brw_tracked_state brw_wm_pull_constants = {
479 .dirty = {
480 .mesa = (_NEW_PROGRAM_CONSTANTS),
481 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
482 .cache = CACHE_NEW_WM_PROG,
483 },
484 .emit = brw_upload_wm_pull_constants,
485 };
486
487 static void
488 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
489 {
490 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
491 * Notes):
492 *
493 * A null surface will be used in instances where an actual surface is
494 * not bound. When a write message is generated to a null surface, no
495 * actual surface is written to. When a read message (including any
496 * sampling engine message) is generated to a null surface, the result
497 * is all zeros. Note that a null surface type is allowed to be used
498 * with all messages, even if it is not specificially indicated as
499 * supported. All of the remaining fields in surface state are ignored
500 * for null surfaces, with the following exceptions:
501 *
502 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
503 * depth buffer’s corresponding state for all render target surfaces,
504 * including null.
505 *
506 * - Surface Format must be R8G8B8A8_UNORM.
507 */
508 struct gl_context *ctx = &brw->ctx;
509 uint32_t *surf;
510 unsigned surface_type = BRW_SURFACE_NULL;
511 drm_intel_bo *bo = NULL;
512 unsigned pitch_minus_1 = 0;
513 uint32_t multisampling_state = 0;
514 uint32_t surf_index =
515 brw->wm.prog_data->binding_table.render_target_start + unit;
516
517 /* _NEW_BUFFERS */
518 const struct gl_framebuffer *fb = ctx->DrawBuffer;
519
520 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
521 &brw->wm.base.surf_offset[surf_index]);
522
523 if (fb->Visual.samples > 1) {
524 /* On Gen6, null render targets seem to cause GPU hangs when
525 * multisampling. So work around this problem by rendering into dummy
526 * color buffer.
527 *
528 * To decrease the amount of memory needed by the workaround buffer, we
529 * set its pitch to 128 bytes (the width of a Y tile). This means that
530 * the amount of memory needed for the workaround buffer is
531 * (width_in_tiles + height_in_tiles - 1) tiles.
532 *
533 * Note that since the workaround buffer will be interpreted by the
534 * hardware as an interleaved multisampled buffer, we need to compute
535 * width_in_tiles and height_in_tiles by dividing the width and height
536 * by 16 rather than the normal Y-tile size of 32.
537 */
538 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
539 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
540 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
541 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
542 size_needed);
543 bo = brw->wm.multisampled_null_render_target_bo;
544 surface_type = BRW_SURFACE_2D;
545 pitch_minus_1 = 127;
546 multisampling_state =
547 brw_get_surface_num_multisamples(fb->Visual.samples);
548 }
549
550 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
551 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
552 if (brw->gen < 6) {
553 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
554 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
555 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
556 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
557 }
558 surf[1] = bo ? bo->offset64 : 0;
559 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
560 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
561
562 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
563 * Notes):
564 *
565 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
566 */
567 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
568 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
569 surf[4] = multisampling_state;
570 surf[5] = 0;
571
572 if (bo) {
573 drm_intel_bo_emit_reloc(brw->batch.bo,
574 brw->wm.base.surf_offset[surf_index] + 4,
575 bo, 0,
576 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
577 }
578 }
579
580 /**
581 * Sets up a surface state structure to point at the given region.
582 * While it is only used for the front/back buffer currently, it should be
583 * usable for further buffers when doing ARB_draw_buffer support.
584 */
585 static void
586 brw_update_renderbuffer_surface(struct brw_context *brw,
587 struct gl_renderbuffer *rb,
588 bool layered,
589 unsigned int unit)
590 {
591 struct gl_context *ctx = &brw->ctx;
592 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
593 struct intel_mipmap_tree *mt = irb->mt;
594 struct intel_region *region;
595 uint32_t *surf;
596 uint32_t tile_x, tile_y;
597 uint32_t format = 0;
598 /* _NEW_BUFFERS */
599 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
600 uint32_t surf_index =
601 brw->wm.prog_data->binding_table.render_target_start + unit;
602
603 assert(!layered);
604
605 if (rb->TexImage && !brw->has_surface_tile_offset) {
606 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
607
608 if (tile_x != 0 || tile_y != 0) {
609 /* Original gen4 hardware couldn't draw to a non-tile-aligned
610 * destination in a miptree unless you actually setup your renderbuffer
611 * as a miptree and used the fragile lod/array_index/etc. controls to
612 * select the image. So, instead, we just make a new single-level
613 * miptree and render into that.
614 */
615 intel_renderbuffer_move_to_temp(brw, irb, false);
616 mt = irb->mt;
617 }
618 }
619
620 intel_miptree_used_for_rendering(irb->mt);
621
622 region = irb->mt->region;
623
624 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
625 &brw->wm.base.surf_offset[surf_index]);
626
627 format = brw->render_target_format[rb_format];
628 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
629 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
630 __FUNCTION__, _mesa_get_format_name(rb_format));
631 }
632
633 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
634 format << BRW_SURFACE_FORMAT_SHIFT);
635
636 /* reloc */
637 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
638 region->bo->offset64);
639
640 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
641 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
642
643 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
644 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
645
646 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
647
648 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
649 /* Note that the low bits of these fields are missing, so
650 * there's the possibility of getting in trouble.
651 */
652 assert(tile_x % 4 == 0);
653 assert(tile_y % 2 == 0);
654 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
655 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
656 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
657
658 if (brw->gen < 6) {
659 /* _NEW_COLOR */
660 if (!ctx->Color.ColorLogicOpEnabled &&
661 (ctx->Color.BlendEnabled & (1 << unit)))
662 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
663
664 if (!ctx->Color.ColorMask[unit][0])
665 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
666 if (!ctx->Color.ColorMask[unit][1])
667 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
668 if (!ctx->Color.ColorMask[unit][2])
669 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
670
671 /* As mentioned above, disable writes to the alpha component when the
672 * renderbuffer is XRGB.
673 */
674 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
675 !ctx->Color.ColorMask[unit][3]) {
676 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
677 }
678 }
679
680 drm_intel_bo_emit_reloc(brw->batch.bo,
681 brw->wm.base.surf_offset[surf_index] + 4,
682 region->bo,
683 surf[1] - region->bo->offset64,
684 I915_GEM_DOMAIN_RENDER,
685 I915_GEM_DOMAIN_RENDER);
686 }
687
688 /**
689 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
690 */
691 static void
692 brw_update_renderbuffer_surfaces(struct brw_context *brw)
693 {
694 struct gl_context *ctx = &brw->ctx;
695 GLuint i;
696
697 /* _NEW_BUFFERS | _NEW_COLOR */
698 /* Update surfaces for drawing buffers */
699 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
700 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
701 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
702 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
703 ctx->DrawBuffer->MaxNumLayers > 0, i);
704 } else {
705 brw->vtbl.update_null_renderbuffer_surface(brw, i);
706 }
707 }
708 } else {
709 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
710 }
711 brw->state.dirty.brw |= BRW_NEW_SURFACES;
712 }
713
714 const struct brw_tracked_state brw_renderbuffer_surfaces = {
715 .dirty = {
716 .mesa = (_NEW_COLOR |
717 _NEW_BUFFERS),
718 .brw = BRW_NEW_BATCH,
719 .cache = 0
720 },
721 .emit = brw_update_renderbuffer_surfaces,
722 };
723
724 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
725 .dirty = {
726 .mesa = _NEW_BUFFERS,
727 .brw = BRW_NEW_BATCH,
728 .cache = 0
729 },
730 .emit = brw_update_renderbuffer_surfaces,
731 };
732
733
734 static void
735 update_stage_texture_surfaces(struct brw_context *brw,
736 const struct gl_program *prog,
737 struct brw_stage_state *stage_state,
738 bool for_gather)
739 {
740 if (!prog)
741 return;
742
743 struct gl_context *ctx = &brw->ctx;
744
745 uint32_t *surf_offset = stage_state->surf_offset;
746 if (for_gather)
747 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
748 else
749 surf_offset += stage_state->prog_data->binding_table.texture_start;
750
751 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
752 for (unsigned s = 0; s < num_samplers; s++) {
753 surf_offset[s] = 0;
754
755 if (prog->SamplersUsed & (1 << s)) {
756 const unsigned unit = prog->SamplerUnits[s];
757
758 /* _NEW_TEXTURE */
759 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
760 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
761 }
762 }
763 }
764 }
765
766
767 /**
768 * Construct SURFACE_STATE objects for enabled textures.
769 */
770 static void
771 brw_update_texture_surfaces(struct brw_context *brw)
772 {
773 /* BRW_NEW_VERTEX_PROGRAM */
774 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
775
776 /* BRW_NEW_GEOMETRY_PROGRAM */
777 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
778
779 /* BRW_NEW_FRAGMENT_PROGRAM */
780 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
781
782 /* _NEW_TEXTURE */
783 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
784 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
785 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
786
787 /* emit alternate set of surface state for gather. this
788 * allows the surface format to be overriden for only the
789 * gather4 messages. */
790 if (vs && vs->UsesGather)
791 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
792 if (gs && gs->UsesGather)
793 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
794 if (fs && fs->UsesGather)
795 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
796
797 brw->state.dirty.brw |= BRW_NEW_SURFACES;
798 }
799
800 const struct brw_tracked_state brw_texture_surfaces = {
801 .dirty = {
802 .mesa = _NEW_TEXTURE,
803 .brw = BRW_NEW_BATCH |
804 BRW_NEW_UNIFORM_BUFFER |
805 BRW_NEW_VERTEX_PROGRAM |
806 BRW_NEW_GEOMETRY_PROGRAM |
807 BRW_NEW_FRAGMENT_PROGRAM,
808 .cache = 0
809 },
810 .emit = brw_update_texture_surfaces,
811 };
812
813 void
814 brw_upload_ubo_surfaces(struct brw_context *brw,
815 struct gl_shader *shader,
816 struct brw_stage_state *stage_state,
817 struct brw_stage_prog_data *prog_data)
818 {
819 struct gl_context *ctx = &brw->ctx;
820
821 if (!shader)
822 return;
823
824 uint32_t *surf_offsets =
825 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
826
827 for (int i = 0; i < shader->NumUniformBlocks; i++) {
828 struct gl_uniform_buffer_binding *binding;
829 struct intel_buffer_object *intel_bo;
830
831 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
832 intel_bo = intel_buffer_object(binding->BufferObject);
833 drm_intel_bo *bo =
834 intel_bufferobj_buffer(brw, intel_bo,
835 binding->Offset,
836 binding->BufferObject->Size - binding->Offset);
837
838 /* Because behavior for referencing outside of the binding's size in the
839 * glBindBufferRange case is undefined, we can just bind the whole buffer
840 * glBindBufferBase wants and be a correct implementation.
841 */
842 brw_create_constant_surface(brw, bo, binding->Offset,
843 bo->size - binding->Offset,
844 &surf_offsets[i],
845 shader->Stage == MESA_SHADER_FRAGMENT);
846 }
847
848 if (shader->NumUniformBlocks)
849 brw->state.dirty.brw |= BRW_NEW_SURFACES;
850 }
851
852 static void
853 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
854 {
855 struct gl_context *ctx = &brw->ctx;
856 /* _NEW_PROGRAM */
857 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
858
859 if (!prog)
860 return;
861
862 /* CACHE_NEW_WM_PROG */
863 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
864 &brw->wm.base, &brw->wm.prog_data->base);
865 }
866
867 const struct brw_tracked_state brw_wm_ubo_surfaces = {
868 .dirty = {
869 .mesa = _NEW_PROGRAM,
870 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
871 .cache = CACHE_NEW_WM_PROG,
872 },
873 .emit = brw_upload_wm_ubo_surfaces,
874 };
875
876 void
877 brw_upload_abo_surfaces(struct brw_context *brw,
878 struct gl_shader_program *prog,
879 struct brw_stage_state *stage_state,
880 struct brw_stage_prog_data *prog_data)
881 {
882 struct gl_context *ctx = &brw->ctx;
883 uint32_t *surf_offsets =
884 &stage_state->surf_offset[prog_data->binding_table.abo_start];
885
886 for (int i = 0; i < prog->NumAtomicBuffers; i++) {
887 struct gl_atomic_buffer_binding *binding =
888 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
889 struct intel_buffer_object *intel_bo =
890 intel_buffer_object(binding->BufferObject);
891 drm_intel_bo *bo = intel_bufferobj_buffer(
892 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
893
894 brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
895 bo->size - binding->Offset,
896 &surf_offsets[i], true);
897 }
898
899 if (prog->NumUniformBlocks)
900 brw->state.dirty.brw |= BRW_NEW_SURFACES;
901 }
902
903 static void
904 brw_upload_wm_abo_surfaces(struct brw_context *brw)
905 {
906 struct gl_context *ctx = &brw->ctx;
907 /* _NEW_PROGRAM */
908 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
909
910 if (prog) {
911 /* CACHE_NEW_WM_PROG */
912 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
913 &brw->wm.prog_data->base);
914 }
915 }
916
917 const struct brw_tracked_state brw_wm_abo_surfaces = {
918 .dirty = {
919 .mesa = _NEW_PROGRAM,
920 .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
921 .cache = CACHE_NEW_WM_PROG,
922 },
923 .emit = brw_upload_wm_abo_surfaces,
924 };
925
926 void
927 gen4_init_vtable_surface_functions(struct brw_context *brw)
928 {
929 brw->vtbl.update_texture_surface = brw_update_texture_surface;
930 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
931 brw->vtbl.update_null_renderbuffer_surface =
932 brw_update_null_renderbuffer_surface;
933 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
934 }