i965: Implement ARB_query_buffer_object for HSW+
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29 * Authors:
30 * Keith Whitwell <keithw@vmware.com>
31 */
32
33
34 #include "main/api_exec.h"
35 #include "main/context.h"
36 #include "main/fbobject.h"
37 #include "main/extensions.h"
38 #include "main/imports.h"
39 #include "main/macros.h"
40 #include "main/points.h"
41 #include "main/version.h"
42 #include "main/vtxfmt.h"
43 #include "main/texobj.h"
44 #include "main/framebuffer.h"
45
46 #include "vbo/vbo_context.h"
47
48 #include "drivers/common/driverfuncs.h"
49 #include "drivers/common/meta.h"
50 #include "utils.h"
51
52 #include "brw_context.h"
53 #include "brw_defines.h"
54 #include "brw_compiler.h"
55 #include "brw_draw.h"
56 #include "brw_state.h"
57
58 #include "intel_batchbuffer.h"
59 #include "intel_buffer_objects.h"
60 #include "intel_buffers.h"
61 #include "intel_fbo.h"
62 #include "intel_mipmap_tree.h"
63 #include "intel_pixel.h"
64 #include "intel_image.h"
65 #include "intel_tex.h"
66 #include "intel_tex_obj.h"
67
68 #include "swrast_setup/swrast_setup.h"
69 #include "tnl/tnl.h"
70 #include "tnl/t_pipeline.h"
71 #include "util/ralloc.h"
72 #include "util/debug.h"
73
74 /***************************************
75 * Mesa's Driver Functions
76 ***************************************/
77
78 const char *const brw_vendor_string = "Intel Open Source Technology Center";
79
80 static const char *
81 get_bsw_model(const struct intel_screen *intelScreen)
82 {
83 switch (intelScreen->eu_total) {
84 case 16:
85 return "405";
86 case 12:
87 return "400";
88 default:
89 return " ";
90 }
91 }
92
93 const char *
94 brw_get_renderer_string(const struct intel_screen *intelScreen)
95 {
96 const char *chipset;
97 static char buffer[128];
98 char *bsw = NULL;
99
100 switch (intelScreen->deviceID) {
101 #undef CHIPSET
102 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
103 #include "pci_ids/i965_pci_ids.h"
104 default:
105 chipset = "Unknown Intel Chipset";
106 break;
107 }
108
109 /* Braswell branding is funny, so we have to fix it up here */
110 if (intelScreen->deviceID == 0x22B1) {
111 bsw = strdup(chipset);
112 char *needle = strstr(bsw, "XXX");
113 if (needle) {
114 memcpy(needle, get_bsw_model(intelScreen), 3);
115 chipset = bsw;
116 }
117 }
118
119 (void) driGetRendererString(buffer, chipset, 0);
120 free(bsw);
121 return buffer;
122 }
123
124 static const GLubyte *
125 intel_get_string(struct gl_context * ctx, GLenum name)
126 {
127 const struct brw_context *const brw = brw_context(ctx);
128
129 switch (name) {
130 case GL_VENDOR:
131 return (GLubyte *) brw_vendor_string;
132
133 case GL_RENDERER:
134 return
135 (GLubyte *) brw_get_renderer_string(brw->intelScreen);
136
137 default:
138 return NULL;
139 }
140 }
141
142 static void
143 intel_viewport(struct gl_context *ctx)
144 {
145 struct brw_context *brw = brw_context(ctx);
146 __DRIcontext *driContext = brw->driContext;
147
148 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
149 if (driContext->driDrawablePriv)
150 dri2InvalidateDrawable(driContext->driDrawablePriv);
151 if (driContext->driReadablePriv)
152 dri2InvalidateDrawable(driContext->driReadablePriv);
153 }
154 }
155
156 static void
157 intel_update_framebuffer(struct gl_context *ctx,
158 struct gl_framebuffer *fb)
159 {
160 struct brw_context *brw = brw_context(ctx);
161
162 /* Quantize the derived default number of samples
163 */
164 fb->DefaultGeometry._NumSamples =
165 intel_quantize_num_samples(brw->intelScreen,
166 fb->DefaultGeometry.NumSamples);
167 }
168
169 static void
170 intel_update_state(struct gl_context * ctx, GLuint new_state)
171 {
172 struct brw_context *brw = brw_context(ctx);
173 struct intel_texture_object *tex_obj;
174 struct intel_renderbuffer *depth_irb;
175
176 if (ctx->swrast_context)
177 _swrast_InvalidateState(ctx, new_state);
178 _vbo_InvalidateState(ctx, new_state);
179
180 brw->NewGLState |= new_state;
181
182 _mesa_unlock_context_textures(ctx);
183
184 /* Resolve the depth buffer's HiZ buffer. */
185 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
186 if (depth_irb)
187 intel_renderbuffer_resolve_hiz(brw, depth_irb);
188
189 /* Resolve depth buffer and render cache of each enabled texture. */
190 int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
191 for (int i = 0; i <= maxEnabledUnit; i++) {
192 if (!ctx->Texture.Unit[i]._Current)
193 continue;
194 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
195 if (!tex_obj || !tex_obj->mt)
196 continue;
197 intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
198 /* Sampling engine understands lossless compression and resolving
199 * those surfaces should be skipped for performance reasons.
200 */
201 intel_miptree_resolve_color(brw, tex_obj->mt,
202 INTEL_MIPTREE_IGNORE_CCS_E);
203 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
204 }
205
206 /* Resolve color for each active shader image. */
207 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
208 const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
209 ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
210
211 if (unlikely(shader && shader->NumImages)) {
212 for (unsigned j = 0; j < shader->NumImages; j++) {
213 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
214 tex_obj = intel_texture_object(u->TexObj);
215
216 if (tex_obj && tex_obj->mt) {
217 /* Access to images is implemented using indirect messages
218 * against data port. Normal render target write understands
219 * lossless compression but unfortunately the typed/untyped
220 * read/write interface doesn't. Therefore the compressed
221 * surfaces need to be resolved prior to accessing them.
222 */
223 intel_miptree_resolve_color(brw, tex_obj->mt, 0);
224 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
225 }
226 }
227 }
228 }
229
230 /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
231 * single-sampled color renderbuffers because the CCS buffer isn't
232 * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
233 * enabled because otherwise the surface state will be programmed with the
234 * linear equivalent format anyway.
235 */
236 if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
237 struct gl_framebuffer *fb = ctx->DrawBuffer;
238 for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
239 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
240
241 if (rb == NULL)
242 continue;
243
244 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
245 struct intel_mipmap_tree *mt = irb->mt;
246
247 if (mt == NULL ||
248 mt->num_samples > 1 ||
249 _mesa_get_srgb_format_linear(mt->format) == mt->format)
250 continue;
251
252 /* Lossless compression is not supported for SRGB formats, it
253 * should be impossible to get here with such surfaces.
254 */
255 assert(!intel_miptree_is_lossless_compressed(brw, mt));
256 intel_miptree_resolve_color(brw, mt, 0);
257 brw_render_cache_set_check_flush(brw, mt->bo);
258 }
259 }
260
261 _mesa_lock_context_textures(ctx);
262
263 if (new_state & _NEW_BUFFERS) {
264 intel_update_framebuffer(ctx, ctx->DrawBuffer);
265 if (ctx->DrawBuffer != ctx->ReadBuffer)
266 intel_update_framebuffer(ctx, ctx->ReadBuffer);
267 }
268 }
269
270 #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
271
272 static void
273 intel_flush_front(struct gl_context *ctx)
274 {
275 struct brw_context *brw = brw_context(ctx);
276 __DRIcontext *driContext = brw->driContext;
277 __DRIdrawable *driDrawable = driContext->driDrawablePriv;
278 __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
279
280 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
281 if (flushFront(screen) && driDrawable &&
282 driDrawable->loaderPrivate) {
283
284 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
285 *
286 * This potentially resolves both front and back buffer. It
287 * is unnecessary to resolve the back, but harms nothing except
288 * performance. And no one cares about front-buffer render
289 * performance.
290 */
291 intel_resolve_for_dri2_flush(brw, driDrawable);
292 intel_batchbuffer_flush(brw);
293
294 flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
295
296 /* We set the dirty bit in intel_prepare_render() if we're
297 * front buffer rendering once we get there.
298 */
299 brw->front_buffer_dirty = false;
300 }
301 }
302 }
303
304 static void
305 intel_glFlush(struct gl_context *ctx)
306 {
307 struct brw_context *brw = brw_context(ctx);
308
309 intel_batchbuffer_flush(brw);
310 intel_flush_front(ctx);
311
312 brw->need_flush_throttle = true;
313 }
314
315 static void
316 intel_finish(struct gl_context * ctx)
317 {
318 struct brw_context *brw = brw_context(ctx);
319
320 intel_glFlush(ctx);
321
322 if (brw->batch.last_bo)
323 drm_intel_bo_wait_rendering(brw->batch.last_bo);
324 }
325
326 static void
327 brw_init_driver_functions(struct brw_context *brw,
328 struct dd_function_table *functions)
329 {
330 _mesa_init_driver_functions(functions);
331
332 /* GLX uses DRI2 invalidate events to handle window resizing.
333 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
334 * which doesn't provide a mechanism for snooping the event queues.
335 *
336 * So EGL still relies on viewport hacks to handle window resizing.
337 * This should go away with DRI3000.
338 */
339 if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
340 functions->Viewport = intel_viewport;
341
342 functions->Flush = intel_glFlush;
343 functions->Finish = intel_finish;
344 functions->GetString = intel_get_string;
345 functions->UpdateState = intel_update_state;
346
347 intelInitTextureFuncs(functions);
348 intelInitTextureImageFuncs(functions);
349 intelInitTextureSubImageFuncs(functions);
350 intelInitTextureCopyImageFuncs(functions);
351 intelInitCopyImageFuncs(functions);
352 intelInitClearFuncs(functions);
353 intelInitBufferFuncs(functions);
354 intelInitPixelFuncs(functions);
355 intelInitBufferObjectFuncs(functions);
356 intel_init_syncobj_functions(functions);
357 brw_init_object_purgeable_functions(functions);
358
359 brwInitFragProgFuncs( functions );
360 brw_init_common_queryobj_functions(functions);
361 if (brw->gen >= 8 || brw->is_haswell)
362 hsw_init_queryobj_functions(functions);
363 else if (brw->gen >= 6)
364 gen6_init_queryobj_functions(functions);
365 else
366 gen4_init_queryobj_functions(functions);
367 brw_init_compute_functions(functions);
368 if (brw->gen >= 7)
369 brw_init_conditional_render_functions(functions);
370
371 functions->QueryInternalFormat = brw_query_internal_format;
372
373 functions->NewTransformFeedback = brw_new_transform_feedback;
374 functions->DeleteTransformFeedback = brw_delete_transform_feedback;
375 functions->GetTransformFeedbackVertexCount =
376 brw_get_transform_feedback_vertex_count;
377 if (brw->gen >= 7) {
378 functions->BeginTransformFeedback = gen7_begin_transform_feedback;
379 functions->EndTransformFeedback = gen7_end_transform_feedback;
380 functions->PauseTransformFeedback = gen7_pause_transform_feedback;
381 functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
382 } else {
383 functions->BeginTransformFeedback = brw_begin_transform_feedback;
384 functions->EndTransformFeedback = brw_end_transform_feedback;
385 }
386
387 if (brw->gen >= 6)
388 functions->GetSamplePosition = gen6_get_sample_position;
389 }
390
391 static void
392 brw_initialize_context_constants(struct brw_context *brw)
393 {
394 struct gl_context *ctx = &brw->ctx;
395 const struct brw_compiler *compiler = brw->intelScreen->compiler;
396
397 const bool stage_exists[MESA_SHADER_STAGES] = {
398 [MESA_SHADER_VERTEX] = true,
399 [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
400 [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
401 [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
402 [MESA_SHADER_FRAGMENT] = true,
403 [MESA_SHADER_COMPUTE] =
404 (ctx->API == API_OPENGL_CORE &&
405 ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
406 (ctx->API == API_OPENGLES2 &&
407 ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
408 _mesa_extension_override_enables.ARB_compute_shader,
409 };
410
411 unsigned num_stages = 0;
412 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
413 if (stage_exists[i])
414 num_stages++;
415 }
416
417 unsigned max_samplers =
418 brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
419
420 ctx->Const.MaxDualSourceDrawBuffers = 1;
421 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
422 ctx->Const.MaxCombinedShaderOutputResources =
423 MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
424
425 ctx->Const.QueryCounterBits.Timestamp = 36;
426
427 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
428 ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
429 ctx->Const.MaxRenderbufferSize = 8192;
430 ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
431 ctx->Const.Max3DTextureLevels = 12; /* 2048 */
432 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
433 ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
434 ctx->Const.MaxTextureMbytes = 1536;
435 ctx->Const.MaxTextureRectSize = 1 << 12;
436 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
437 ctx->Const.StripTextureBorder = true;
438 if (brw->gen >= 7)
439 ctx->Const.MaxProgramTextureGatherComponents = 4;
440 else if (brw->gen == 6)
441 ctx->Const.MaxProgramTextureGatherComponents = 1;
442
443 ctx->Const.MaxUniformBlockSize = 65536;
444
445 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
446 struct gl_program_constants *prog = &ctx->Const.Program[i];
447
448 if (!stage_exists[i])
449 continue;
450
451 prog->MaxTextureImageUnits = max_samplers;
452
453 prog->MaxUniformBlocks = BRW_MAX_UBO;
454 prog->MaxCombinedUniformComponents =
455 prog->MaxUniformComponents +
456 ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
457
458 prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
459 prog->MaxAtomicBuffers = BRW_MAX_ABO;
460 prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
461 prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
462 }
463
464 ctx->Const.MaxTextureUnits =
465 MIN2(ctx->Const.MaxTextureCoordUnits,
466 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
467
468 ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
469 ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
470 ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
471 ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
472 ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
473 ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
474 ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
475
476
477 /* Hardware only supports a limited number of transform feedback buffers.
478 * So we need to override the Mesa default (which is based only on software
479 * limits).
480 */
481 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
482
483 /* On Gen6, in the worst case, we use up one binding table entry per
484 * transform feedback component (see comments above the definition of
485 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
486 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
487 * BRW_MAX_SOL_BINDINGS.
488 *
489 * In "separate components" mode, we need to divide this value by
490 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
491 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
492 */
493 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
494 ctx->Const.MaxTransformFeedbackSeparateComponents =
495 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
496
497 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
498
499 int max_samples;
500 const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
501 const int clamp_max_samples =
502 driQueryOptioni(&brw->optionCache, "clamp_max_samples");
503
504 if (clamp_max_samples < 0) {
505 max_samples = msaa_modes[0];
506 } else {
507 /* Select the largest supported MSAA mode that does not exceed
508 * clamp_max_samples.
509 */
510 max_samples = 0;
511 for (int i = 0; msaa_modes[i] != 0; ++i) {
512 if (msaa_modes[i] <= clamp_max_samples) {
513 max_samples = msaa_modes[i];
514 break;
515 }
516 }
517 }
518
519 ctx->Const.MaxSamples = max_samples;
520 ctx->Const.MaxColorTextureSamples = max_samples;
521 ctx->Const.MaxDepthTextureSamples = max_samples;
522 ctx->Const.MaxIntegerSamples = max_samples;
523 ctx->Const.MaxImageSamples = 0;
524
525 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
526 * to map indices of rectangular grid to sample numbers within a pixel.
527 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
528 * extension implementation. For more details see the comment above
529 * gen6_set_sample_maps() definition.
530 */
531 gen6_set_sample_maps(ctx);
532
533 ctx->Const.MinLineWidth = 1.0;
534 ctx->Const.MinLineWidthAA = 1.0;
535 if (brw->gen >= 6) {
536 ctx->Const.MaxLineWidth = 7.375;
537 ctx->Const.MaxLineWidthAA = 7.375;
538 ctx->Const.LineWidthGranularity = 0.125;
539 } else {
540 ctx->Const.MaxLineWidth = 7.0;
541 ctx->Const.MaxLineWidthAA = 7.0;
542 ctx->Const.LineWidthGranularity = 0.5;
543 }
544
545 /* For non-antialiased lines, we have to round the line width to the
546 * nearest whole number. Make sure that we don't advertise a line
547 * width that, when rounded, will be beyond the actual hardware
548 * maximum.
549 */
550 assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
551
552 ctx->Const.MinPointSize = 1.0;
553 ctx->Const.MinPointSizeAA = 1.0;
554 ctx->Const.MaxPointSize = 255.0;
555 ctx->Const.MaxPointSizeAA = 255.0;
556 ctx->Const.PointSizeGranularity = 1.0;
557
558 if (brw->gen >= 5 || brw->is_g4x)
559 ctx->Const.MaxClipPlanes = 8;
560
561 ctx->Const.LowerTessLevel = true;
562
563 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
564 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
565 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
566 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
567 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
568 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
569 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
570 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
571 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
572 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
573 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
574 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
575 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
576 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
577
578 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
579 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
580 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
581 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
582 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
583 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
584 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
585 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
586 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
587 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
588 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
589
590 /* Fragment shaders use real, 32-bit twos-complement integers for all
591 * integer types.
592 */
593 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
594 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
595 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
596 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
597 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
598
599 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
600 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
601 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
602 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
603 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
604
605 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
606 * but we're not sure how it's actually done for vertex order,
607 * that affect provoking vertex decision. Always use last vertex
608 * convention for quad primitive which works as expected for now.
609 */
610 if (brw->gen >= 6)
611 ctx->Const.QuadsFollowProvokingVertexConvention = false;
612
613 ctx->Const.NativeIntegers = true;
614 ctx->Const.VertexID_is_zero_based = true;
615
616 /* Regarding the CMP instruction, the Ivybridge PRM says:
617 *
618 * "For each enabled channel 0b or 1b is assigned to the appropriate flag
619 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
620 * 0xFFFFFFFF) is assigned to dst."
621 *
622 * but PRMs for earlier generations say
623 *
624 * "In dword format, one GRF may store up to 8 results. When the register
625 * is used later as a vector of Booleans, as only LSB at each channel
626 * contains meaning [sic] data, software should make sure all higher bits
627 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
628 *
629 * We select the representation of a true boolean uniform to be ~0, and fix
630 * the results of Gen <= 5 CMP instruction's with -(result & 1).
631 */
632 ctx->Const.UniformBooleanTrue = ~0;
633
634 /* From the gen4 PRM, volume 4 page 127:
635 *
636 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
637 * the base address of the first element of the surface, computed in
638 * software by adding the surface base address to the byte offset of
639 * the element in the buffer."
640 *
641 * However, unaligned accesses are slower, so enforce buffer alignment.
642 */
643 ctx->Const.UniformBufferOffsetAlignment = 16;
644
645 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
646 * that we can safely have the CPU and GPU writing the same SSBO on
647 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
648 * writes, so there's no problem. For an SSBO, the GPU and the CPU can
649 * be updating disjoint regions of the buffer simultaneously and that will
650 * break if the regions overlap the same cacheline.
651 */
652 ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
653 ctx->Const.TextureBufferOffsetAlignment = 16;
654 ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
655
656 if (brw->gen >= 6) {
657 ctx->Const.MaxVarying = 32;
658 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
659 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
660 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
661 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
662 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
663 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
664 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
665 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
666 }
667
668 /* We want the GLSL compiler to emit code that uses condition codes */
669 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
670 ctx->Const.ShaderCompilerOptions[i] =
671 brw->intelScreen->compiler->glsl_compiler_options[i];
672 }
673
674 if (brw->gen >= 7) {
675 ctx->Const.MaxViewportWidth = 32768;
676 ctx->Const.MaxViewportHeight = 32768;
677 }
678
679 /* ARB_viewport_array */
680 if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
681 ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
682 ctx->Const.ViewportSubpixelBits = 0;
683
684 /* Cast to float before negating because MaxViewportWidth is unsigned.
685 */
686 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
687 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
688 }
689
690 /* ARB_gpu_shader5 */
691 if (brw->gen >= 7)
692 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
693
694 /* ARB_framebuffer_no_attachments */
695 ctx->Const.MaxFramebufferWidth = 16384;
696 ctx->Const.MaxFramebufferHeight = 16384;
697 ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
698 ctx->Const.MaxFramebufferSamples = max_samples;
699 }
700
701 static void
702 brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
703 {
704 struct gl_context *ctx = &brw->ctx;
705
706 /* For ES, we set these constants based on SIMD8.
707 *
708 * TODO: Once we can always generate SIMD16, we should update this.
709 *
710 * For GL, we assume we can generate a SIMD16 program, but this currently
711 * is not always true. This allows us to run more test cases, and will be
712 * required based on desktop GL compute shader requirements.
713 */
714 const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
715
716 const uint32_t max_invocations = simd_size * max_threads;
717 ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
718 ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
719 ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
720 ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
721 ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
722 }
723
724 /**
725 * Process driconf (drirc) options, setting appropriate context flags.
726 *
727 * intelInitExtensions still pokes at optionCache directly, in order to
728 * avoid advertising various extensions. No flags are set, so it makes
729 * sense to continue doing that there.
730 */
731 static void
732 brw_process_driconf_options(struct brw_context *brw)
733 {
734 struct gl_context *ctx = &brw->ctx;
735
736 driOptionCache *options = &brw->optionCache;
737 driParseConfigFiles(options, &brw->intelScreen->optionCache,
738 brw->driContext->driScreenPriv->myNum, "i965");
739
740 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
741 switch (bo_reuse_mode) {
742 case DRI_CONF_BO_REUSE_DISABLED:
743 break;
744 case DRI_CONF_BO_REUSE_ALL:
745 intel_bufmgr_gem_enable_reuse(brw->bufmgr);
746 break;
747 }
748
749 if (!driQueryOptionb(options, "hiz")) {
750 brw->has_hiz = false;
751 /* On gen6, you can only do separate stencil with HIZ. */
752 if (brw->gen == 6)
753 brw->has_separate_stencil = false;
754 }
755
756 if (driQueryOptionb(options, "always_flush_batch")) {
757 fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
758 brw->always_flush_batch = true;
759 }
760
761 if (driQueryOptionb(options, "always_flush_cache")) {
762 fprintf(stderr, "flushing GPU caches before/after each draw call\n");
763 brw->always_flush_cache = true;
764 }
765
766 if (driQueryOptionb(options, "disable_throttling")) {
767 fprintf(stderr, "disabling flush throttling\n");
768 brw->disable_throttling = true;
769 }
770
771 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
772
773 ctx->Const.ForceGLSLExtensionsWarn =
774 driQueryOptionb(options, "force_glsl_extensions_warn");
775
776 ctx->Const.DisableGLSLLineContinuations =
777 driQueryOptionb(options, "disable_glsl_line_continuations");
778
779 ctx->Const.AllowGLSLExtensionDirectiveMidShader =
780 driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
781
782 brw->dual_color_blend_by_location =
783 driQueryOptionb(options, "dual_color_blend_by_location");
784 }
785
786 GLboolean
787 brwCreateContext(gl_api api,
788 const struct gl_config *mesaVis,
789 __DRIcontext *driContextPriv,
790 unsigned major_version,
791 unsigned minor_version,
792 uint32_t flags,
793 bool notify_reset,
794 unsigned *dri_ctx_error,
795 void *sharedContextPrivate)
796 {
797 __DRIscreen *sPriv = driContextPriv->driScreenPriv;
798 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
799 struct intel_screen *screen = sPriv->driverPrivate;
800 const struct brw_device_info *devinfo = screen->devinfo;
801 struct dd_function_table functions;
802
803 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
804 * provides us with context reset notifications.
805 */
806 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
807 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
808
809 if (screen->has_context_reset_notification)
810 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
811
812 if (flags & ~allowed_flags) {
813 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
814 return false;
815 }
816
817 struct brw_context *brw = rzalloc(NULL, struct brw_context);
818 if (!brw) {
819 fprintf(stderr, "%s: failed to alloc context\n", __func__);
820 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
821 return false;
822 }
823
824 driContextPriv->driverPrivate = brw;
825 brw->driContext = driContextPriv;
826 brw->intelScreen = screen;
827 brw->bufmgr = screen->bufmgr;
828
829 brw->gen = devinfo->gen;
830 brw->gt = devinfo->gt;
831 brw->is_g4x = devinfo->is_g4x;
832 brw->is_baytrail = devinfo->is_baytrail;
833 brw->is_haswell = devinfo->is_haswell;
834 brw->is_cherryview = devinfo->is_cherryview;
835 brw->is_broxton = devinfo->is_broxton;
836 brw->has_llc = devinfo->has_llc;
837 brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
838 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
839 brw->has_pln = devinfo->has_pln;
840 brw->has_compr4 = devinfo->has_compr4;
841 brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
842 brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
843 brw->needs_unlit_centroid_workaround =
844 devinfo->needs_unlit_centroid_workaround;
845
846 brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
847 brw->has_swizzling = screen->hw_has_swizzling;
848
849 brw->vs.base.stage = MESA_SHADER_VERTEX;
850 brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
851 brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
852 brw->gs.base.stage = MESA_SHADER_GEOMETRY;
853 brw->wm.base.stage = MESA_SHADER_FRAGMENT;
854 if (brw->gen >= 8) {
855 gen8_init_vtable_surface_functions(brw);
856 brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
857 } else if (brw->gen >= 7) {
858 gen7_init_vtable_surface_functions(brw);
859 brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
860 } else if (brw->gen >= 6) {
861 gen6_init_vtable_surface_functions(brw);
862 brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
863 } else {
864 gen4_init_vtable_surface_functions(brw);
865 brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
866 }
867
868 brw_init_driver_functions(brw, &functions);
869
870 if (notify_reset)
871 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
872
873 struct gl_context *ctx = &brw->ctx;
874
875 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
876 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
877 fprintf(stderr, "%s: failed to init mesa context\n", __func__);
878 intelDestroyContext(driContextPriv);
879 return false;
880 }
881
882 driContextSetFlags(ctx, flags);
883
884 /* Initialize the software rasterizer and helper modules.
885 *
886 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
887 * software fallbacks (which we have to support on legacy GL to do weird
888 * glDrawPixels(), glBitmap(), and other functions).
889 */
890 if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
891 _swrast_CreateContext(ctx);
892 }
893
894 _vbo_CreateContext(ctx);
895 if (ctx->swrast_context) {
896 _tnl_CreateContext(ctx);
897 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
898 _swsetup_CreateContext(ctx);
899
900 /* Configure swrast to match hardware characteristics: */
901 _swrast_allow_pixel_fog(ctx, false);
902 _swrast_allow_vertex_fog(ctx, true);
903 }
904
905 _mesa_meta_init(ctx);
906
907 brw_process_driconf_options(brw);
908
909 if (INTEL_DEBUG & DEBUG_PERF)
910 brw->perf_debug = true;
911
912 brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
913 brw_initialize_context_constants(brw);
914
915 ctx->Const.ResetStrategy = notify_reset
916 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
917
918 /* Reinitialize the context point state. It depends on ctx->Const values. */
919 _mesa_init_point(ctx);
920
921 intel_fbo_init(brw);
922
923 intel_batchbuffer_init(brw);
924
925 if (brw->gen >= 6) {
926 /* Create a new hardware context. Using a hardware context means that
927 * our GPU state will be saved/restored on context switch, allowing us
928 * to assume that the GPU is in the same state we left it in.
929 *
930 * This is required for transform feedback buffer offsets, query objects,
931 * and also allows us to reduce how much state we have to emit.
932 */
933 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
934
935 if (!brw->hw_ctx) {
936 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
937 intelDestroyContext(driContextPriv);
938 return false;
939 }
940 }
941
942 if (brw_init_pipe_control(brw, devinfo)) {
943 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
944 intelDestroyContext(driContextPriv);
945 return false;
946 }
947
948 brw_init_state(brw);
949
950 intelInitExtensions(ctx);
951
952 brw_init_surface_formats(brw);
953
954 brw->max_vs_threads = devinfo->max_vs_threads;
955 brw->max_hs_threads = devinfo->max_hs_threads;
956 brw->max_ds_threads = devinfo->max_ds_threads;
957 brw->max_gs_threads = devinfo->max_gs_threads;
958 brw->max_wm_threads = devinfo->max_wm_threads;
959 /* FINISHME: Do this for all platforms that the kernel supports */
960 if (brw->is_cherryview &&
961 screen->subslice_total > 0 && screen->eu_total > 0) {
962 /* Logical CS threads = EUs per subslice * 7 threads per EU */
963 brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7;
964
965 /* Fuse configurations may give more threads than expected, never less. */
966 if (brw->max_cs_threads < devinfo->max_cs_threads)
967 brw->max_cs_threads = devinfo->max_cs_threads;
968 } else {
969 brw->max_cs_threads = devinfo->max_cs_threads;
970 }
971 brw->urb.size = devinfo->urb.size;
972 brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
973 brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
974 brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
975 brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
976 brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
977
978 /* Estimate the size of the mappable aperture into the GTT. There's an
979 * ioctl to get the whole GTT size, but not one to get the mappable subset.
980 * It turns out it's basically always 256MB, though some ancient hardware
981 * was smaller.
982 */
983 uint32_t gtt_size = 256 * 1024 * 1024;
984
985 /* We don't want to map two objects such that a memcpy between them would
986 * just fault one mapping in and then the other over and over forever. So
987 * we would need to divide the GTT size by 2. Additionally, some GTT is
988 * taken up by things like the framebuffer and the ringbuffer and such, so
989 * be more conservative.
990 */
991 brw->max_gtt_map_object_size = gtt_size / 4;
992
993 if (brw->gen == 6)
994 brw->urb.gs_present = false;
995
996 brw->prim_restart.in_progress = false;
997 brw->prim_restart.enable_cut_index = false;
998 brw->gs.enabled = false;
999 brw->sf.viewport_transform_enable = true;
1000
1001 brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1002
1003 brw->use_resource_streamer = screen->has_resource_streamer &&
1004 (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
1005 env_var_as_boolean("INTEL_USE_GATHER", false));
1006
1007 ctx->VertexProgram._MaintainTnlProgram = true;
1008 ctx->FragmentProgram._MaintainTexEnvProgram = true;
1009
1010 brw_draw_init( brw );
1011
1012 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1013 /* Turn on some extra GL_ARB_debug_output generation. */
1014 brw->perf_debug = true;
1015 }
1016
1017 if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
1018 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1019
1020 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1021 brw_init_shader_time(brw);
1022
1023 _mesa_compute_version(ctx);
1024
1025 _mesa_initialize_dispatch_tables(ctx);
1026 _mesa_initialize_vbo_vtxfmt(ctx);
1027
1028 if (ctx->Extensions.AMD_performance_monitor) {
1029 brw_init_performance_monitors(brw);
1030 }
1031
1032 vbo_use_buffer_objects(ctx);
1033 vbo_always_unmap_buffers(ctx);
1034
1035 return true;
1036 }
1037
1038 void
1039 intelDestroyContext(__DRIcontext * driContextPriv)
1040 {
1041 struct brw_context *brw =
1042 (struct brw_context *) driContextPriv->driverPrivate;
1043 struct gl_context *ctx = &brw->ctx;
1044
1045 /* Dump a final BMP in case the application doesn't call SwapBuffers */
1046 if (INTEL_DEBUG & DEBUG_AUB) {
1047 intel_batchbuffer_flush(brw);
1048 aub_dump_bmp(&brw->ctx);
1049 }
1050
1051 _mesa_meta_free(&brw->ctx);
1052 brw_meta_fast_clear_free(brw);
1053
1054 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1055 /* Force a report. */
1056 brw->shader_time.report_time = 0;
1057
1058 brw_collect_and_report_shader_time(brw);
1059 brw_destroy_shader_time(brw);
1060 }
1061
1062 brw_destroy_state(brw);
1063 brw_draw_destroy(brw);
1064
1065 drm_intel_bo_unreference(brw->curbe.curbe_bo);
1066 if (brw->vs.base.scratch_bo)
1067 drm_intel_bo_unreference(brw->vs.base.scratch_bo);
1068 if (brw->gs.base.scratch_bo)
1069 drm_intel_bo_unreference(brw->gs.base.scratch_bo);
1070 if (brw->wm.base.scratch_bo)
1071 drm_intel_bo_unreference(brw->wm.base.scratch_bo);
1072
1073 gen7_reset_hw_bt_pool_offsets(brw);
1074 drm_intel_bo_unreference(brw->hw_bt_pool.bo);
1075 brw->hw_bt_pool.bo = NULL;
1076
1077 drm_intel_gem_context_destroy(brw->hw_ctx);
1078
1079 if (ctx->swrast_context) {
1080 _swsetup_DestroyContext(&brw->ctx);
1081 _tnl_DestroyContext(&brw->ctx);
1082 }
1083 _vbo_DestroyContext(&brw->ctx);
1084
1085 if (ctx->swrast_context)
1086 _swrast_DestroyContext(&brw->ctx);
1087
1088 brw_fini_pipe_control(brw);
1089 intel_batchbuffer_free(brw);
1090
1091 drm_intel_bo_unreference(brw->throttle_batch[1]);
1092 drm_intel_bo_unreference(brw->throttle_batch[0]);
1093 brw->throttle_batch[1] = NULL;
1094 brw->throttle_batch[0] = NULL;
1095
1096 driDestroyOptionCache(&brw->optionCache);
1097
1098 /* free the Mesa context */
1099 _mesa_free_context_data(&brw->ctx);
1100
1101 ralloc_free(brw);
1102 driContextPriv->driverPrivate = NULL;
1103 }
1104
1105 GLboolean
1106 intelUnbindContext(__DRIcontext * driContextPriv)
1107 {
1108 /* Unset current context and dispath table */
1109 _mesa_make_current(NULL, NULL, NULL);
1110
1111 return true;
1112 }
1113
1114 /**
1115 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1116 * on window system framebuffers.
1117 *
1118 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1119 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1120 * sRGB encode if the renderbuffer can handle it. You can ask specifically
1121 * for a visual where you're guaranteed to be capable, but it turns out that
1122 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1123 * incapable ones, because there's no difference between the two in resources
1124 * used. Applications thus get built that accidentally rely on the default
1125 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
1126 * great...
1127 *
1128 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1129 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1130 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1131 * capable, do sRGB encode". Then, for your window system renderbuffers, you
1132 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1133 * and get no sRGB encode (assuming that both kinds of visual are available).
1134 * Thus our choice to support sRGB by default on our visuals for desktop would
1135 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1136 *
1137 * Unfortunately, renderbuffer setup happens before a context is created. So
1138 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1139 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1140 * yet), we go turn that back off before anyone finds out.
1141 */
1142 static void
1143 intel_gles3_srgb_workaround(struct brw_context *brw,
1144 struct gl_framebuffer *fb)
1145 {
1146 struct gl_context *ctx = &brw->ctx;
1147
1148 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1149 return;
1150
1151 /* Some day when we support the sRGB capable bit on visuals available for
1152 * GLES, we'll need to respect that and not disable things here.
1153 */
1154 fb->Visual.sRGBCapable = false;
1155 for (int i = 0; i < BUFFER_COUNT; i++) {
1156 struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1157 if (rb)
1158 rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1159 }
1160 }
1161
1162 GLboolean
1163 intelMakeCurrent(__DRIcontext * driContextPriv,
1164 __DRIdrawable * driDrawPriv,
1165 __DRIdrawable * driReadPriv)
1166 {
1167 struct brw_context *brw;
1168 GET_CURRENT_CONTEXT(curCtx);
1169
1170 if (driContextPriv)
1171 brw = (struct brw_context *) driContextPriv->driverPrivate;
1172 else
1173 brw = NULL;
1174
1175 /* According to the glXMakeCurrent() man page: "Pending commands to
1176 * the previous context, if any, are flushed before it is released."
1177 * But only flush if we're actually changing contexts.
1178 */
1179 if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1180 _mesa_flush(curCtx);
1181 }
1182
1183 if (driContextPriv) {
1184 struct gl_context *ctx = &brw->ctx;
1185 struct gl_framebuffer *fb, *readFb;
1186
1187 if (driDrawPriv == NULL) {
1188 fb = _mesa_get_incomplete_framebuffer();
1189 } else {
1190 fb = driDrawPriv->driverPrivate;
1191 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1192 }
1193
1194 if (driReadPriv == NULL) {
1195 readFb = _mesa_get_incomplete_framebuffer();
1196 } else {
1197 readFb = driReadPriv->driverPrivate;
1198 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1199 }
1200
1201 /* The sRGB workaround changes the renderbuffer's format. We must change
1202 * the format before the renderbuffer's miptree get's allocated, otherwise
1203 * the formats of the renderbuffer and its miptree will differ.
1204 */
1205 intel_gles3_srgb_workaround(brw, fb);
1206 intel_gles3_srgb_workaround(brw, readFb);
1207
1208 /* If the context viewport hasn't been initialized, force a call out to
1209 * the loader to get buffers so we have a drawable size for the initial
1210 * viewport. */
1211 if (!brw->ctx.ViewportInitialized)
1212 intel_prepare_render(brw);
1213
1214 _mesa_make_current(ctx, fb, readFb);
1215 } else {
1216 _mesa_make_current(NULL, NULL, NULL);
1217 }
1218
1219 return true;
1220 }
1221
1222 void
1223 intel_resolve_for_dri2_flush(struct brw_context *brw,
1224 __DRIdrawable *drawable)
1225 {
1226 if (brw->gen < 6) {
1227 /* MSAA and fast color clear are not supported, so don't waste time
1228 * checking whether a resolve is needed.
1229 */
1230 return;
1231 }
1232
1233 struct gl_framebuffer *fb = drawable->driverPrivate;
1234 struct intel_renderbuffer *rb;
1235
1236 /* Usually, only the back buffer will need to be downsampled. However,
1237 * the front buffer will also need it if the user has rendered into it.
1238 */
1239 static const gl_buffer_index buffers[2] = {
1240 BUFFER_BACK_LEFT,
1241 BUFFER_FRONT_LEFT,
1242 };
1243
1244 for (int i = 0; i < 2; ++i) {
1245 rb = intel_get_renderbuffer(fb, buffers[i]);
1246 if (rb == NULL || rb->mt == NULL)
1247 continue;
1248 if (rb->mt->num_samples <= 1)
1249 intel_miptree_resolve_color(brw, rb->mt, 0);
1250 else
1251 intel_renderbuffer_downsample(brw, rb);
1252 }
1253 }
1254
1255 static unsigned
1256 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1257 {
1258 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1259 }
1260
1261 static void
1262 intel_query_dri2_buffers(struct brw_context *brw,
1263 __DRIdrawable *drawable,
1264 __DRIbuffer **buffers,
1265 int *count);
1266
1267 static void
1268 intel_process_dri2_buffer(struct brw_context *brw,
1269 __DRIdrawable *drawable,
1270 __DRIbuffer *buffer,
1271 struct intel_renderbuffer *rb,
1272 const char *buffer_name);
1273
1274 static void
1275 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1276
1277 static void
1278 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1279 {
1280 struct gl_framebuffer *fb = drawable->driverPrivate;
1281 struct intel_renderbuffer *rb;
1282 __DRIbuffer *buffers = NULL;
1283 int i, count;
1284 const char *region_name;
1285
1286 /* Set this up front, so that in case our buffers get invalidated
1287 * while we're getting new buffers, we don't clobber the stamp and
1288 * thus ignore the invalidate. */
1289 drawable->lastStamp = drawable->dri2.stamp;
1290
1291 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1292 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1293
1294 intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1295
1296 if (buffers == NULL)
1297 return;
1298
1299 for (i = 0; i < count; i++) {
1300 switch (buffers[i].attachment) {
1301 case __DRI_BUFFER_FRONT_LEFT:
1302 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1303 region_name = "dri2 front buffer";
1304 break;
1305
1306 case __DRI_BUFFER_FAKE_FRONT_LEFT:
1307 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1308 region_name = "dri2 fake front buffer";
1309 break;
1310
1311 case __DRI_BUFFER_BACK_LEFT:
1312 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1313 region_name = "dri2 back buffer";
1314 break;
1315
1316 case __DRI_BUFFER_DEPTH:
1317 case __DRI_BUFFER_HIZ:
1318 case __DRI_BUFFER_DEPTH_STENCIL:
1319 case __DRI_BUFFER_STENCIL:
1320 case __DRI_BUFFER_ACCUM:
1321 default:
1322 fprintf(stderr,
1323 "unhandled buffer attach event, attachment type %d\n",
1324 buffers[i].attachment);
1325 return;
1326 }
1327
1328 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1329 }
1330
1331 }
1332
1333 void
1334 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1335 {
1336 struct brw_context *brw = context->driverPrivate;
1337 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1338
1339 /* Set this up front, so that in case our buffers get invalidated
1340 * while we're getting new buffers, we don't clobber the stamp and
1341 * thus ignore the invalidate. */
1342 drawable->lastStamp = drawable->dri2.stamp;
1343
1344 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1345 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1346
1347 if (screen->image.loader)
1348 intel_update_image_buffers(brw, drawable);
1349 else
1350 intel_update_dri2_buffers(brw, drawable);
1351
1352 driUpdateFramebufferSize(&brw->ctx, drawable);
1353 }
1354
1355 /**
1356 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1357 * state is required.
1358 */
1359 void
1360 intel_prepare_render(struct brw_context *brw)
1361 {
1362 struct gl_context *ctx = &brw->ctx;
1363 __DRIcontext *driContext = brw->driContext;
1364 __DRIdrawable *drawable;
1365
1366 drawable = driContext->driDrawablePriv;
1367 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1368 if (drawable->lastStamp != drawable->dri2.stamp)
1369 intel_update_renderbuffers(driContext, drawable);
1370 driContext->dri2.draw_stamp = drawable->dri2.stamp;
1371 }
1372
1373 drawable = driContext->driReadablePriv;
1374 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1375 if (drawable->lastStamp != drawable->dri2.stamp)
1376 intel_update_renderbuffers(driContext, drawable);
1377 driContext->dri2.read_stamp = drawable->dri2.stamp;
1378 }
1379
1380 /* If we're currently rendering to the front buffer, the rendering
1381 * that will happen next will probably dirty the front buffer. So
1382 * mark it as dirty here.
1383 */
1384 if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1385 brw->front_buffer_dirty = true;
1386 }
1387
1388 /**
1389 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1390 *
1391 * To determine which DRI buffers to request, examine the renderbuffers
1392 * attached to the drawable's framebuffer. Then request the buffers with
1393 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1394 *
1395 * This is called from intel_update_renderbuffers().
1396 *
1397 * \param drawable Drawable whose buffers are queried.
1398 * \param buffers [out] List of buffers returned by DRI2 query.
1399 * \param buffer_count [out] Number of buffers returned.
1400 *
1401 * \see intel_update_renderbuffers()
1402 * \see DRI2GetBuffers()
1403 * \see DRI2GetBuffersWithFormat()
1404 */
1405 static void
1406 intel_query_dri2_buffers(struct brw_context *brw,
1407 __DRIdrawable *drawable,
1408 __DRIbuffer **buffers,
1409 int *buffer_count)
1410 {
1411 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1412 struct gl_framebuffer *fb = drawable->driverPrivate;
1413 int i = 0;
1414 unsigned attachments[8];
1415
1416 struct intel_renderbuffer *front_rb;
1417 struct intel_renderbuffer *back_rb;
1418
1419 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1420 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1421
1422 memset(attachments, 0, sizeof(attachments));
1423 if ((_mesa_is_front_buffer_drawing(fb) ||
1424 _mesa_is_front_buffer_reading(fb) ||
1425 !back_rb) && front_rb) {
1426 /* If a fake front buffer is in use, then querying for
1427 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1428 * the real front buffer to the fake front buffer. So before doing the
1429 * query, we need to make sure all the pending drawing has landed in the
1430 * real front buffer.
1431 */
1432 intel_batchbuffer_flush(brw);
1433 intel_flush_front(&brw->ctx);
1434
1435 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1436 attachments[i++] = intel_bits_per_pixel(front_rb);
1437 } else if (front_rb && brw->front_buffer_dirty) {
1438 /* We have pending front buffer rendering, but we aren't querying for a
1439 * front buffer. If the front buffer we have is a fake front buffer,
1440 * the X server is going to throw it away when it processes the query.
1441 * So before doing the query, make sure all the pending drawing has
1442 * landed in the real front buffer.
1443 */
1444 intel_batchbuffer_flush(brw);
1445 intel_flush_front(&brw->ctx);
1446 }
1447
1448 if (back_rb) {
1449 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1450 attachments[i++] = intel_bits_per_pixel(back_rb);
1451 }
1452
1453 assert(i <= ARRAY_SIZE(attachments));
1454
1455 *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1456 &drawable->w,
1457 &drawable->h,
1458 attachments, i / 2,
1459 buffer_count,
1460 drawable->loaderPrivate);
1461 }
1462
1463 /**
1464 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1465 *
1466 * This is called from intel_update_renderbuffers().
1467 *
1468 * \par Note:
1469 * DRI buffers whose attachment point is DRI2BufferStencil or
1470 * DRI2BufferDepthStencil are handled as special cases.
1471 *
1472 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1473 * that is passed to drm_intel_bo_gem_create_from_name().
1474 *
1475 * \see intel_update_renderbuffers()
1476 */
1477 static void
1478 intel_process_dri2_buffer(struct brw_context *brw,
1479 __DRIdrawable *drawable,
1480 __DRIbuffer *buffer,
1481 struct intel_renderbuffer *rb,
1482 const char *buffer_name)
1483 {
1484 struct gl_framebuffer *fb = drawable->driverPrivate;
1485 drm_intel_bo *bo;
1486
1487 if (!rb)
1488 return;
1489
1490 unsigned num_samples = rb->Base.Base.NumSamples;
1491
1492 /* We try to avoid closing and reopening the same BO name, because the first
1493 * use of a mapping of the buffer involves a bunch of page faulting which is
1494 * moderately expensive.
1495 */
1496 struct intel_mipmap_tree *last_mt;
1497 if (num_samples == 0)
1498 last_mt = rb->mt;
1499 else
1500 last_mt = rb->singlesample_mt;
1501
1502 uint32_t old_name = 0;
1503 if (last_mt) {
1504 /* The bo already has a name because the miptree was created by a
1505 * previous call to intel_process_dri2_buffer(). If a bo already has a
1506 * name, then drm_intel_bo_flink() is a low-cost getter. It does not
1507 * create a new name.
1508 */
1509 drm_intel_bo_flink(last_mt->bo, &old_name);
1510 }
1511
1512 if (old_name == buffer->name)
1513 return;
1514
1515 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1516 fprintf(stderr,
1517 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1518 buffer->name, buffer->attachment,
1519 buffer->cpp, buffer->pitch);
1520 }
1521
1522 bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1523 buffer->name);
1524 if (!bo) {
1525 fprintf(stderr,
1526 "Failed to open BO for returned DRI2 buffer "
1527 "(%dx%d, %s, named %d).\n"
1528 "This is likely a bug in the X Server that will lead to a "
1529 "crash soon.\n",
1530 drawable->w, drawable->h, buffer_name, buffer->name);
1531 return;
1532 }
1533
1534 intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1535 drawable->w, drawable->h,
1536 buffer->pitch);
1537
1538 if (_mesa_is_front_buffer_drawing(fb) &&
1539 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1540 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1541 rb->Base.Base.NumSamples > 1) {
1542 intel_renderbuffer_upsample(brw, rb);
1543 }
1544
1545 assert(rb->mt);
1546
1547 drm_intel_bo_unreference(bo);
1548 }
1549
1550 /**
1551 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1552 *
1553 * To determine which DRI buffers to request, examine the renderbuffers
1554 * attached to the drawable's framebuffer. Then request the buffers from
1555 * the image loader
1556 *
1557 * This is called from intel_update_renderbuffers().
1558 *
1559 * \param drawable Drawable whose buffers are queried.
1560 * \param buffers [out] List of buffers returned by DRI2 query.
1561 * \param buffer_count [out] Number of buffers returned.
1562 *
1563 * \see intel_update_renderbuffers()
1564 */
1565
1566 static void
1567 intel_update_image_buffer(struct brw_context *intel,
1568 __DRIdrawable *drawable,
1569 struct intel_renderbuffer *rb,
1570 __DRIimage *buffer,
1571 enum __DRIimageBufferMask buffer_type)
1572 {
1573 struct gl_framebuffer *fb = drawable->driverPrivate;
1574
1575 if (!rb || !buffer->bo)
1576 return;
1577
1578 unsigned num_samples = rb->Base.Base.NumSamples;
1579
1580 /* Check and see if we're already bound to the right
1581 * buffer object
1582 */
1583 struct intel_mipmap_tree *last_mt;
1584 if (num_samples == 0)
1585 last_mt = rb->mt;
1586 else
1587 last_mt = rb->singlesample_mt;
1588
1589 if (last_mt && last_mt->bo == buffer->bo)
1590 return;
1591
1592 intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1593 buffer->width, buffer->height,
1594 buffer->pitch);
1595
1596 if (_mesa_is_front_buffer_drawing(fb) &&
1597 buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1598 rb->Base.Base.NumSamples > 1) {
1599 intel_renderbuffer_upsample(intel, rb);
1600 }
1601 }
1602
1603 static void
1604 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1605 {
1606 struct gl_framebuffer *fb = drawable->driverPrivate;
1607 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1608 struct intel_renderbuffer *front_rb;
1609 struct intel_renderbuffer *back_rb;
1610 struct __DRIimageList images;
1611 unsigned int format;
1612 uint32_t buffer_mask = 0;
1613
1614 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1615 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1616
1617 if (back_rb)
1618 format = intel_rb_format(back_rb);
1619 else if (front_rb)
1620 format = intel_rb_format(front_rb);
1621 else
1622 return;
1623
1624 if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1625 _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1626 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1627 }
1628
1629 if (back_rb)
1630 buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1631
1632 (*screen->image.loader->getBuffers) (drawable,
1633 driGLFormatToImageFormat(format),
1634 &drawable->dri2.stamp,
1635 drawable->loaderPrivate,
1636 buffer_mask,
1637 &images);
1638
1639 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1640 drawable->w = images.front->width;
1641 drawable->h = images.front->height;
1642 intel_update_image_buffer(brw,
1643 drawable,
1644 front_rb,
1645 images.front,
1646 __DRI_IMAGE_BUFFER_FRONT);
1647 }
1648 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1649 drawable->w = images.back->width;
1650 drawable->h = images.back->height;
1651 intel_update_image_buffer(brw,
1652 drawable,
1653 back_rb,
1654 images.back,
1655 __DRI_IMAGE_BUFFER_BACK);
1656 }
1657 }