i965/miptree: Rework create flags
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29 * Authors:
30 * Keith Whitwell <keithw@vmware.com>
31 */
32
33
34 #include "compiler/nir/nir.h"
35 #include "main/api_exec.h"
36 #include "main/context.h"
37 #include "main/fbobject.h"
38 #include "main/extensions.h"
39 #include "main/imports.h"
40 #include "main/macros.h"
41 #include "main/points.h"
42 #include "main/version.h"
43 #include "main/vtxfmt.h"
44 #include "main/texobj.h"
45 #include "main/framebuffer.h"
46 #include "main/stencil.h"
47 #include "main/state.h"
48
49 #include "vbo/vbo_context.h"
50
51 #include "drivers/common/driverfuncs.h"
52 #include "drivers/common/meta.h"
53 #include "utils.h"
54
55 #include "brw_context.h"
56 #include "brw_defines.h"
57 #include "brw_blorp.h"
58 #include "brw_draw.h"
59 #include "brw_state.h"
60
61 #include "intel_batchbuffer.h"
62 #include "intel_buffer_objects.h"
63 #include "intel_buffers.h"
64 #include "intel_fbo.h"
65 #include "intel_mipmap_tree.h"
66 #include "intel_pixel.h"
67 #include "intel_image.h"
68 #include "intel_tex.h"
69 #include "intel_tex_obj.h"
70
71 #include "swrast_setup/swrast_setup.h"
72 #include "tnl/tnl.h"
73 #include "tnl/t_pipeline.h"
74 #include "util/ralloc.h"
75 #include "util/debug.h"
76 #include "isl/isl.h"
77
78 /***************************************
79 * Mesa's Driver Functions
80 ***************************************/
81
82 const char *const brw_vendor_string = "Intel Open Source Technology Center";
83
84 static const char *
85 get_bsw_model(const struct intel_screen *screen)
86 {
87 switch (screen->eu_total) {
88 case 16:
89 return "405";
90 case 12:
91 return "400";
92 default:
93 return " ";
94 }
95 }
96
97 const char *
98 brw_get_renderer_string(const struct intel_screen *screen)
99 {
100 const char *chipset;
101 static char buffer[128];
102 char *bsw = NULL;
103
104 switch (screen->deviceID) {
105 #undef CHIPSET
106 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
107 #include "pci_ids/i965_pci_ids.h"
108 default:
109 chipset = "Unknown Intel Chipset";
110 break;
111 }
112
113 /* Braswell branding is funny, so we have to fix it up here */
114 if (screen->deviceID == 0x22B1) {
115 bsw = strdup(chipset);
116 char *needle = strstr(bsw, "XXX");
117 if (needle) {
118 memcpy(needle, get_bsw_model(screen), 3);
119 chipset = bsw;
120 }
121 }
122
123 (void) driGetRendererString(buffer, chipset, 0);
124 free(bsw);
125 return buffer;
126 }
127
128 static const GLubyte *
129 intel_get_string(struct gl_context * ctx, GLenum name)
130 {
131 const struct brw_context *const brw = brw_context(ctx);
132
133 switch (name) {
134 case GL_VENDOR:
135 return (GLubyte *) brw_vendor_string;
136
137 case GL_RENDERER:
138 return
139 (GLubyte *) brw_get_renderer_string(brw->screen);
140
141 default:
142 return NULL;
143 }
144 }
145
146 static void
147 intel_viewport(struct gl_context *ctx)
148 {
149 struct brw_context *brw = brw_context(ctx);
150 __DRIcontext *driContext = brw->driContext;
151
152 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
153 if (driContext->driDrawablePriv)
154 dri2InvalidateDrawable(driContext->driDrawablePriv);
155 if (driContext->driReadablePriv)
156 dri2InvalidateDrawable(driContext->driReadablePriv);
157 }
158 }
159
160 static void
161 intel_update_framebuffer(struct gl_context *ctx,
162 struct gl_framebuffer *fb)
163 {
164 struct brw_context *brw = brw_context(ctx);
165
166 /* Quantize the derived default number of samples
167 */
168 fb->DefaultGeometry._NumSamples =
169 intel_quantize_num_samples(brw->screen,
170 fb->DefaultGeometry.NumSamples);
171 }
172
173 static void
174 intel_update_state(struct gl_context * ctx)
175 {
176 GLuint new_state = ctx->NewState;
177 struct brw_context *brw = brw_context(ctx);
178
179 if (ctx->swrast_context)
180 _swrast_InvalidateState(ctx, new_state);
181
182 brw->NewGLState |= new_state;
183
184 if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
185 _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
186
187 if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
188 brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
189 brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
190 brw->stencil_write_enabled =
191 _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
192 }
193
194 if (new_state & _NEW_POLYGON)
195 brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
196
197 if (new_state & _NEW_BUFFERS) {
198 intel_update_framebuffer(ctx, ctx->DrawBuffer);
199 if (ctx->DrawBuffer != ctx->ReadBuffer)
200 intel_update_framebuffer(ctx, ctx->ReadBuffer);
201 }
202 }
203
204 #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
205
206 static void
207 intel_flush_front(struct gl_context *ctx)
208 {
209 struct brw_context *brw = brw_context(ctx);
210 __DRIcontext *driContext = brw->driContext;
211 __DRIdrawable *driDrawable = driContext->driDrawablePriv;
212 __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
213
214 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
215 if (flushFront(dri_screen) && driDrawable &&
216 driDrawable->loaderPrivate) {
217
218 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
219 *
220 * This potentially resolves both front and back buffer. It
221 * is unnecessary to resolve the back, but harms nothing except
222 * performance. And no one cares about front-buffer render
223 * performance.
224 */
225 intel_resolve_for_dri2_flush(brw, driDrawable);
226 intel_batchbuffer_flush(brw);
227
228 flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
229
230 /* We set the dirty bit in intel_prepare_render() if we're
231 * front buffer rendering once we get there.
232 */
233 brw->front_buffer_dirty = false;
234 }
235 }
236 }
237
238 static void
239 intel_glFlush(struct gl_context *ctx)
240 {
241 struct brw_context *brw = brw_context(ctx);
242
243 intel_batchbuffer_flush(brw);
244 intel_flush_front(ctx);
245
246 brw->need_flush_throttle = true;
247 }
248
249 static void
250 intel_finish(struct gl_context * ctx)
251 {
252 struct brw_context *brw = brw_context(ctx);
253
254 intel_glFlush(ctx);
255
256 if (brw->batch.last_bo)
257 brw_bo_wait_rendering(brw->batch.last_bo);
258 }
259
260 static void
261 brw_init_driver_functions(struct brw_context *brw,
262 struct dd_function_table *functions)
263 {
264 _mesa_init_driver_functions(functions);
265
266 /* GLX uses DRI2 invalidate events to handle window resizing.
267 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
268 * which doesn't provide a mechanism for snooping the event queues.
269 *
270 * So EGL still relies on viewport hacks to handle window resizing.
271 * This should go away with DRI3000.
272 */
273 if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
274 functions->Viewport = intel_viewport;
275
276 functions->Flush = intel_glFlush;
277 functions->Finish = intel_finish;
278 functions->GetString = intel_get_string;
279 functions->UpdateState = intel_update_state;
280
281 intelInitTextureFuncs(functions);
282 intelInitTextureImageFuncs(functions);
283 intelInitTextureSubImageFuncs(functions);
284 intelInitTextureCopyImageFuncs(functions);
285 intelInitCopyImageFuncs(functions);
286 intelInitClearFuncs(functions);
287 intelInitBufferFuncs(functions);
288 intelInitPixelFuncs(functions);
289 intelInitBufferObjectFuncs(functions);
290 brw_init_syncobj_functions(functions);
291 brw_init_object_purgeable_functions(functions);
292
293 brwInitFragProgFuncs( functions );
294 brw_init_common_queryobj_functions(functions);
295 if (brw->gen >= 8 || brw->is_haswell)
296 hsw_init_queryobj_functions(functions);
297 else if (brw->gen >= 6)
298 gen6_init_queryobj_functions(functions);
299 else
300 gen4_init_queryobj_functions(functions);
301 brw_init_compute_functions(functions);
302 brw_init_conditional_render_functions(functions);
303
304 functions->QueryInternalFormat = brw_query_internal_format;
305
306 functions->NewTransformFeedback = brw_new_transform_feedback;
307 functions->DeleteTransformFeedback = brw_delete_transform_feedback;
308 if (can_do_mi_math_and_lrr(brw->screen)) {
309 functions->BeginTransformFeedback = hsw_begin_transform_feedback;
310 functions->EndTransformFeedback = hsw_end_transform_feedback;
311 functions->PauseTransformFeedback = hsw_pause_transform_feedback;
312 functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
313 } else if (brw->gen >= 7) {
314 functions->BeginTransformFeedback = gen7_begin_transform_feedback;
315 functions->EndTransformFeedback = gen7_end_transform_feedback;
316 functions->PauseTransformFeedback = gen7_pause_transform_feedback;
317 functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
318 functions->GetTransformFeedbackVertexCount =
319 brw_get_transform_feedback_vertex_count;
320 } else {
321 functions->BeginTransformFeedback = brw_begin_transform_feedback;
322 functions->EndTransformFeedback = brw_end_transform_feedback;
323 functions->PauseTransformFeedback = brw_pause_transform_feedback;
324 functions->ResumeTransformFeedback = brw_resume_transform_feedback;
325 functions->GetTransformFeedbackVertexCount =
326 brw_get_transform_feedback_vertex_count;
327 }
328
329 if (brw->gen >= 6)
330 functions->GetSamplePosition = gen6_get_sample_position;
331 }
332
333 static void
334 brw_initialize_context_constants(struct brw_context *brw)
335 {
336 struct gl_context *ctx = &brw->ctx;
337 const struct brw_compiler *compiler = brw->screen->compiler;
338
339 const bool stage_exists[MESA_SHADER_STAGES] = {
340 [MESA_SHADER_VERTEX] = true,
341 [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
342 [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
343 [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
344 [MESA_SHADER_FRAGMENT] = true,
345 [MESA_SHADER_COMPUTE] =
346 ((ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGL_CORE) &&
347 ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
348 (ctx->API == API_OPENGLES2 &&
349 ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
350 _mesa_extension_override_enables.ARB_compute_shader,
351 };
352
353 unsigned num_stages = 0;
354 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
355 if (stage_exists[i])
356 num_stages++;
357 }
358
359 unsigned max_samplers =
360 brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
361
362 ctx->Const.MaxDualSourceDrawBuffers = 1;
363 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
364 ctx->Const.MaxCombinedShaderOutputResources =
365 MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
366
367 /* The timestamp register we can read for glGetTimestamp() is
368 * sometimes only 32 bits, before scaling to nanoseconds (depending
369 * on kernel).
370 *
371 * Once scaled to nanoseconds the timestamp would roll over at a
372 * non-power-of-two, so an application couldn't use
373 * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we
374 * report 36 bits and truncate at that (rolling over 5 times as
375 * often as the HW counter), and when the 32-bit counter rolls
376 * over, it happens to also be at a rollover in the reported value
377 * from near (1<<36) to 0.
378 *
379 * The low 32 bits rolls over in ~343 seconds. Our 36-bit result
380 * rolls over every ~69 seconds.
381 */
382 ctx->Const.QueryCounterBits.Timestamp = 36;
383
384 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
385 ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
386 if (brw->gen >= 7) {
387 ctx->Const.MaxRenderbufferSize = 16384;
388 ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS);
389 ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
390 } else {
391 ctx->Const.MaxRenderbufferSize = 8192;
392 ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
393 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
394 }
395 ctx->Const.Max3DTextureLevels = 12; /* 2048 */
396 ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
397 ctx->Const.MaxTextureMbytes = 1536;
398 ctx->Const.MaxTextureRectSize = brw->gen >= 7 ? 16384 : 8192;
399 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
400 ctx->Const.MaxTextureLodBias = 15.0;
401 ctx->Const.StripTextureBorder = true;
402 if (brw->gen >= 7) {
403 ctx->Const.MaxProgramTextureGatherComponents = 4;
404 ctx->Const.MinProgramTextureGatherOffset = -32;
405 ctx->Const.MaxProgramTextureGatherOffset = 31;
406 } else if (brw->gen == 6) {
407 ctx->Const.MaxProgramTextureGatherComponents = 1;
408 ctx->Const.MinProgramTextureGatherOffset = -8;
409 ctx->Const.MaxProgramTextureGatherOffset = 7;
410 }
411
412 ctx->Const.MaxUniformBlockSize = 65536;
413
414 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
415 struct gl_program_constants *prog = &ctx->Const.Program[i];
416
417 if (!stage_exists[i])
418 continue;
419
420 prog->MaxTextureImageUnits = max_samplers;
421
422 prog->MaxUniformBlocks = BRW_MAX_UBO;
423 prog->MaxCombinedUniformComponents =
424 prog->MaxUniformComponents +
425 ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
426
427 prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
428 prog->MaxAtomicBuffers = BRW_MAX_ABO;
429 prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
430 prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
431 }
432
433 ctx->Const.MaxTextureUnits =
434 MIN2(ctx->Const.MaxTextureCoordUnits,
435 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
436
437 ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
438 ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
439 ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
440 ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
441 ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
442 ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
443 ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
444
445
446 /* Hardware only supports a limited number of transform feedback buffers.
447 * So we need to override the Mesa default (which is based only on software
448 * limits).
449 */
450 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
451
452 /* On Gen6, in the worst case, we use up one binding table entry per
453 * transform feedback component (see comments above the definition of
454 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
455 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
456 * BRW_MAX_SOL_BINDINGS.
457 *
458 * In "separate components" mode, we need to divide this value by
459 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
460 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
461 */
462 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
463 ctx->Const.MaxTransformFeedbackSeparateComponents =
464 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
465
466 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
467 !can_do_mi_math_and_lrr(brw->screen);
468
469 int max_samples;
470 const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
471 const int clamp_max_samples =
472 driQueryOptioni(&brw->optionCache, "clamp_max_samples");
473
474 if (clamp_max_samples < 0) {
475 max_samples = msaa_modes[0];
476 } else {
477 /* Select the largest supported MSAA mode that does not exceed
478 * clamp_max_samples.
479 */
480 max_samples = 0;
481 for (int i = 0; msaa_modes[i] != 0; ++i) {
482 if (msaa_modes[i] <= clamp_max_samples) {
483 max_samples = msaa_modes[i];
484 break;
485 }
486 }
487 }
488
489 ctx->Const.MaxSamples = max_samples;
490 ctx->Const.MaxColorTextureSamples = max_samples;
491 ctx->Const.MaxDepthTextureSamples = max_samples;
492 ctx->Const.MaxIntegerSamples = max_samples;
493 ctx->Const.MaxImageSamples = 0;
494
495 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
496 * to map indices of rectangular grid to sample numbers within a pixel.
497 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
498 * extension implementation. For more details see the comment above
499 * gen6_set_sample_maps() definition.
500 */
501 gen6_set_sample_maps(ctx);
502
503 ctx->Const.MinLineWidth = 1.0;
504 ctx->Const.MinLineWidthAA = 1.0;
505 if (brw->gen >= 6) {
506 ctx->Const.MaxLineWidth = 7.375;
507 ctx->Const.MaxLineWidthAA = 7.375;
508 ctx->Const.LineWidthGranularity = 0.125;
509 } else {
510 ctx->Const.MaxLineWidth = 7.0;
511 ctx->Const.MaxLineWidthAA = 7.0;
512 ctx->Const.LineWidthGranularity = 0.5;
513 }
514
515 /* For non-antialiased lines, we have to round the line width to the
516 * nearest whole number. Make sure that we don't advertise a line
517 * width that, when rounded, will be beyond the actual hardware
518 * maximum.
519 */
520 assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
521
522 ctx->Const.MinPointSize = 1.0;
523 ctx->Const.MinPointSizeAA = 1.0;
524 ctx->Const.MaxPointSize = 255.0;
525 ctx->Const.MaxPointSizeAA = 255.0;
526 ctx->Const.PointSizeGranularity = 1.0;
527
528 if (brw->gen >= 5 || brw->is_g4x)
529 ctx->Const.MaxClipPlanes = 8;
530
531 ctx->Const.GLSLTessLevelsAsInputs = true;
532 ctx->Const.LowerTCSPatchVerticesIn = brw->gen >= 8;
533 ctx->Const.LowerTESPatchVerticesIn = true;
534 ctx->Const.PrimitiveRestartForPatches = true;
535
536 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
537 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
538 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
539 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
540 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
541 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
542 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
543 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
544 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
545 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
546 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
547 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
548 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
549 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
550
551 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
552 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
553 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
554 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
555 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
556 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
557 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
558 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
559 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
560 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
561 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
562
563 /* Fragment shaders use real, 32-bit twos-complement integers for all
564 * integer types.
565 */
566 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
567 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
568 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
569 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
570 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
571
572 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
573 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
574 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
575 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
576 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
577
578 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
579 * but we're not sure how it's actually done for vertex order,
580 * that affect provoking vertex decision. Always use last vertex
581 * convention for quad primitive which works as expected for now.
582 */
583 if (brw->gen >= 6)
584 ctx->Const.QuadsFollowProvokingVertexConvention = false;
585
586 ctx->Const.NativeIntegers = true;
587 ctx->Const.VertexID_is_zero_based = true;
588
589 /* Regarding the CMP instruction, the Ivybridge PRM says:
590 *
591 * "For each enabled channel 0b or 1b is assigned to the appropriate flag
592 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
593 * 0xFFFFFFFF) is assigned to dst."
594 *
595 * but PRMs for earlier generations say
596 *
597 * "In dword format, one GRF may store up to 8 results. When the register
598 * is used later as a vector of Booleans, as only LSB at each channel
599 * contains meaning [sic] data, software should make sure all higher bits
600 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
601 *
602 * We select the representation of a true boolean uniform to be ~0, and fix
603 * the results of Gen <= 5 CMP instruction's with -(result & 1).
604 */
605 ctx->Const.UniformBooleanTrue = ~0;
606
607 /* From the gen4 PRM, volume 4 page 127:
608 *
609 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
610 * the base address of the first element of the surface, computed in
611 * software by adding the surface base address to the byte offset of
612 * the element in the buffer."
613 *
614 * However, unaligned accesses are slower, so enforce buffer alignment.
615 *
616 * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
617 * restriction: the start of the buffer needs to be 32B aligned.
618 */
619 ctx->Const.UniformBufferOffsetAlignment = 32;
620
621 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
622 * that we can safely have the CPU and GPU writing the same SSBO on
623 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
624 * writes, so there's no problem. For an SSBO, the GPU and the CPU can
625 * be updating disjoint regions of the buffer simultaneously and that will
626 * break if the regions overlap the same cacheline.
627 */
628 ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
629 ctx->Const.TextureBufferOffsetAlignment = 16;
630 ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
631
632 if (brw->gen >= 6) {
633 ctx->Const.MaxVarying = 32;
634 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
635 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
636 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
637 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
638 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
639 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
640 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
641 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
642 }
643
644 /* We want the GLSL compiler to emit code that uses condition codes */
645 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
646 ctx->Const.ShaderCompilerOptions[i] =
647 brw->screen->compiler->glsl_compiler_options[i];
648 }
649
650 if (brw->gen >= 7) {
651 ctx->Const.MaxViewportWidth = 32768;
652 ctx->Const.MaxViewportHeight = 32768;
653 }
654
655 /* ARB_viewport_array, OES_viewport_array */
656 if (brw->gen >= 6) {
657 ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
658 ctx->Const.ViewportSubpixelBits = 0;
659
660 /* Cast to float before negating because MaxViewportWidth is unsigned.
661 */
662 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
663 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
664 }
665
666 /* ARB_gpu_shader5 */
667 if (brw->gen >= 7)
668 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
669
670 /* ARB_framebuffer_no_attachments */
671 ctx->Const.MaxFramebufferWidth = 16384;
672 ctx->Const.MaxFramebufferHeight = 16384;
673 ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
674 ctx->Const.MaxFramebufferSamples = max_samples;
675
676 /* OES_primitive_bounding_box */
677 ctx->Const.NoPrimitiveBoundingBoxOutput = true;
678 }
679
680 static void
681 brw_initialize_cs_context_constants(struct brw_context *brw)
682 {
683 struct gl_context *ctx = &brw->ctx;
684 const struct intel_screen *screen = brw->screen;
685 struct gen_device_info *devinfo = &brw->screen->devinfo;
686
687 /* FINISHME: Do this for all platforms that the kernel supports */
688 if (brw->is_cherryview &&
689 screen->subslice_total > 0 && screen->eu_total > 0) {
690 /* Logical CS threads = EUs per subslice * 7 threads per EU */
691 uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
692
693 /* Fuse configurations may give more threads than expected, never less. */
694 if (max_cs_threads > devinfo->max_cs_threads)
695 devinfo->max_cs_threads = max_cs_threads;
696 }
697
698 /* Maximum number of scalar compute shader invocations that can be run in
699 * parallel in the same subslice assuming SIMD32 dispatch.
700 *
701 * We don't advertise more than 64 threads, because we are limited to 64 by
702 * our usage of thread_width_max in the gpgpu walker command. This only
703 * currently impacts Haswell, which otherwise might be able to advertise 70
704 * threads. With SIMD32 and 64 threads, Haswell still provides twice the
705 * required the number of invocation needed for ARB_compute_shader.
706 */
707 const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
708 const uint32_t max_invocations = 32 * max_threads;
709 ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
710 ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
711 ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
712 ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
713 ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
714 }
715
716 /**
717 * Process driconf (drirc) options, setting appropriate context flags.
718 *
719 * intelInitExtensions still pokes at optionCache directly, in order to
720 * avoid advertising various extensions. No flags are set, so it makes
721 * sense to continue doing that there.
722 */
723 static void
724 brw_process_driconf_options(struct brw_context *brw)
725 {
726 struct gl_context *ctx = &brw->ctx;
727
728 driOptionCache *options = &brw->optionCache;
729 driParseConfigFiles(options, &brw->screen->optionCache,
730 brw->driContext->driScreenPriv->myNum, "i965");
731
732 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
733 switch (bo_reuse_mode) {
734 case DRI_CONF_BO_REUSE_DISABLED:
735 break;
736 case DRI_CONF_BO_REUSE_ALL:
737 brw_bufmgr_enable_reuse(brw->bufmgr);
738 break;
739 }
740
741 if (INTEL_DEBUG & DEBUG_NO_HIZ) {
742 brw->has_hiz = false;
743 /* On gen6, you can only do separate stencil with HIZ. */
744 if (brw->gen == 6)
745 brw->has_separate_stencil = false;
746 }
747
748 if (driQueryOptionb(options, "mesa_no_error"))
749 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
750
751 if (driQueryOptionb(options, "always_flush_batch")) {
752 fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
753 brw->always_flush_batch = true;
754 }
755
756 if (driQueryOptionb(options, "always_flush_cache")) {
757 fprintf(stderr, "flushing GPU caches before/after each draw call\n");
758 brw->always_flush_cache = true;
759 }
760
761 if (driQueryOptionb(options, "disable_throttling")) {
762 fprintf(stderr, "disabling flush throttling\n");
763 brw->disable_throttling = true;
764 }
765
766 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
767
768 if (driQueryOptionb(&brw->optionCache, "precise_trig"))
769 brw->screen->compiler->precise_trig = true;
770
771 ctx->Const.ForceGLSLExtensionsWarn =
772 driQueryOptionb(options, "force_glsl_extensions_warn");
773
774 ctx->Const.ForceGLSLVersion =
775 driQueryOptioni(options, "force_glsl_version");
776
777 ctx->Const.DisableGLSLLineContinuations =
778 driQueryOptionb(options, "disable_glsl_line_continuations");
779
780 ctx->Const.AllowGLSLExtensionDirectiveMidShader =
781 driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
782
783 ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
784 driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
785
786 ctx->Const.AllowHigherCompatVersion =
787 driQueryOptionb(options, "allow_higher_compat_version");
788
789 ctx->Const.ForceGLSLAbsSqrt =
790 driQueryOptionb(options, "force_glsl_abs_sqrt");
791
792 ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
793
794 brw->dual_color_blend_by_location =
795 driQueryOptionb(options, "dual_color_blend_by_location");
796 }
797
798 GLboolean
799 brwCreateContext(gl_api api,
800 const struct gl_config *mesaVis,
801 __DRIcontext *driContextPriv,
802 unsigned major_version,
803 unsigned minor_version,
804 uint32_t flags,
805 bool notify_reset,
806 unsigned *dri_ctx_error,
807 void *sharedContextPrivate)
808 {
809 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
810 struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
811 const struct gen_device_info *devinfo = &screen->devinfo;
812 struct dd_function_table functions;
813
814 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
815 * provides us with context reset notifications.
816 */
817 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
818 __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
819 __DRI_CTX_FLAG_NO_ERROR;
820
821 if (screen->has_context_reset_notification)
822 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
823
824 if (flags & ~allowed_flags) {
825 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
826 return false;
827 }
828
829 struct brw_context *brw = rzalloc(NULL, struct brw_context);
830 if (!brw) {
831 fprintf(stderr, "%s: failed to alloc context\n", __func__);
832 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
833 return false;
834 }
835
836 driContextPriv->driverPrivate = brw;
837 brw->driContext = driContextPriv;
838 brw->screen = screen;
839 brw->bufmgr = screen->bufmgr;
840
841 brw->gen = devinfo->gen;
842 brw->gt = devinfo->gt;
843 brw->is_g4x = devinfo->is_g4x;
844 brw->is_baytrail = devinfo->is_baytrail;
845 brw->is_haswell = devinfo->is_haswell;
846 brw->is_cherryview = devinfo->is_cherryview;
847 brw->is_broxton = devinfo->is_broxton || devinfo->is_geminilake;
848 brw->has_llc = devinfo->has_llc;
849 brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
850 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
851 brw->has_pln = devinfo->has_pln;
852 brw->has_compr4 = devinfo->has_compr4;
853 brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
854 brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
855 brw->needs_unlit_centroid_workaround =
856 devinfo->needs_unlit_centroid_workaround;
857
858 brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
859 brw->has_swizzling = screen->hw_has_swizzling;
860
861 brw->isl_dev = screen->isl_dev;
862
863 brw->vs.base.stage = MESA_SHADER_VERTEX;
864 brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
865 brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
866 brw->gs.base.stage = MESA_SHADER_GEOMETRY;
867 brw->wm.base.stage = MESA_SHADER_FRAGMENT;
868 if (brw->gen >= 8) {
869 gen8_init_vtable_surface_functions(brw);
870 brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
871 } else if (brw->gen >= 7) {
872 gen7_init_vtable_surface_functions(brw);
873 brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
874 } else if (brw->gen >= 6) {
875 gen6_init_vtable_surface_functions(brw);
876 brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
877 } else {
878 gen4_init_vtable_surface_functions(brw);
879 brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
880 }
881
882 brw_init_driver_functions(brw, &functions);
883
884 if (notify_reset)
885 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
886
887 struct gl_context *ctx = &brw->ctx;
888
889 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
890 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
891 fprintf(stderr, "%s: failed to init mesa context\n", __func__);
892 intelDestroyContext(driContextPriv);
893 return false;
894 }
895
896 driContextSetFlags(ctx, flags);
897
898 /* Initialize the software rasterizer and helper modules.
899 *
900 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
901 * software fallbacks (which we have to support on legacy GL to do weird
902 * glDrawPixels(), glBitmap(), and other functions).
903 */
904 if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
905 _swrast_CreateContext(ctx);
906 }
907
908 _vbo_CreateContext(ctx);
909 if (ctx->swrast_context) {
910 _tnl_CreateContext(ctx);
911 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
912 _swsetup_CreateContext(ctx);
913
914 /* Configure swrast to match hardware characteristics: */
915 _swrast_allow_pixel_fog(ctx, false);
916 _swrast_allow_vertex_fog(ctx, true);
917 }
918
919 _mesa_meta_init(ctx);
920
921 brw_process_driconf_options(brw);
922
923 if (INTEL_DEBUG & DEBUG_PERF)
924 brw->perf_debug = true;
925
926 brw_initialize_cs_context_constants(brw);
927 brw_initialize_context_constants(brw);
928
929 ctx->Const.ResetStrategy = notify_reset
930 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
931
932 /* Reinitialize the context point state. It depends on ctx->Const values. */
933 _mesa_init_point(ctx);
934
935 intel_fbo_init(brw);
936
937 intel_batchbuffer_init(&brw->batch, brw->bufmgr, brw->has_llc);
938
939 if (brw->gen >= 6) {
940 /* Create a new hardware context. Using a hardware context means that
941 * our GPU state will be saved/restored on context switch, allowing us
942 * to assume that the GPU is in the same state we left it in.
943 *
944 * This is required for transform feedback buffer offsets, query objects,
945 * and also allows us to reduce how much state we have to emit.
946 */
947 brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
948
949 if (!brw->hw_ctx) {
950 fprintf(stderr, "Failed to create hardware context.\n");
951 intelDestroyContext(driContextPriv);
952 return false;
953 }
954 }
955
956 if (brw_init_pipe_control(brw, devinfo)) {
957 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
958 intelDestroyContext(driContextPriv);
959 return false;
960 }
961
962 brw_init_state(brw);
963
964 intelInitExtensions(ctx);
965
966 brw_init_surface_formats(brw);
967
968 brw_blorp_init(brw);
969
970 brw->urb.size = devinfo->urb.size;
971
972 if (brw->gen == 6)
973 brw->urb.gs_present = false;
974
975 brw->prim_restart.in_progress = false;
976 brw->prim_restart.enable_cut_index = false;
977 brw->gs.enabled = false;
978 brw->clip.viewport_count = 1;
979
980 brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
981
982 brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
983
984 ctx->VertexProgram._MaintainTnlProgram = true;
985 ctx->FragmentProgram._MaintainTexEnvProgram = true;
986
987 brw_draw_init( brw );
988
989 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
990 /* Turn on some extra GL_ARB_debug_output generation. */
991 brw->perf_debug = true;
992 }
993
994 if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
995 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
996 ctx->Const.RobustAccess = GL_TRUE;
997 }
998
999 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1000 brw_init_shader_time(brw);
1001
1002 _mesa_compute_version(ctx);
1003
1004 _mesa_initialize_dispatch_tables(ctx);
1005 _mesa_initialize_vbo_vtxfmt(ctx);
1006
1007 if (ctx->Extensions.INTEL_performance_query)
1008 brw_init_performance_queries(brw);
1009
1010 vbo_use_buffer_objects(ctx);
1011 vbo_always_unmap_buffers(ctx);
1012
1013 return true;
1014 }
1015
1016 void
1017 intelDestroyContext(__DRIcontext * driContextPriv)
1018 {
1019 struct brw_context *brw =
1020 (struct brw_context *) driContextPriv->driverPrivate;
1021 struct gl_context *ctx = &brw->ctx;
1022
1023 _mesa_meta_free(&brw->ctx);
1024
1025 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1026 /* Force a report. */
1027 brw->shader_time.report_time = 0;
1028
1029 brw_collect_and_report_shader_time(brw);
1030 brw_destroy_shader_time(brw);
1031 }
1032
1033 if (brw->gen >= 6)
1034 blorp_finish(&brw->blorp);
1035
1036 brw_destroy_state(brw);
1037 brw_draw_destroy(brw);
1038
1039 brw_bo_unreference(brw->curbe.curbe_bo);
1040 if (brw->vs.base.scratch_bo)
1041 brw_bo_unreference(brw->vs.base.scratch_bo);
1042 if (brw->tcs.base.scratch_bo)
1043 brw_bo_unreference(brw->tcs.base.scratch_bo);
1044 if (brw->tes.base.scratch_bo)
1045 brw_bo_unreference(brw->tes.base.scratch_bo);
1046 if (brw->gs.base.scratch_bo)
1047 brw_bo_unreference(brw->gs.base.scratch_bo);
1048 if (brw->wm.base.scratch_bo)
1049 brw_bo_unreference(brw->wm.base.scratch_bo);
1050
1051 brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1052
1053 if (ctx->swrast_context) {
1054 _swsetup_DestroyContext(&brw->ctx);
1055 _tnl_DestroyContext(&brw->ctx);
1056 }
1057 _vbo_DestroyContext(&brw->ctx);
1058
1059 if (ctx->swrast_context)
1060 _swrast_DestroyContext(&brw->ctx);
1061
1062 brw_fini_pipe_control(brw);
1063 intel_batchbuffer_free(&brw->batch);
1064
1065 brw_bo_unreference(brw->throttle_batch[1]);
1066 brw_bo_unreference(brw->throttle_batch[0]);
1067 brw->throttle_batch[1] = NULL;
1068 brw->throttle_batch[0] = NULL;
1069
1070 driDestroyOptionCache(&brw->optionCache);
1071
1072 /* free the Mesa context */
1073 _mesa_free_context_data(&brw->ctx);
1074
1075 ralloc_free(brw);
1076 driContextPriv->driverPrivate = NULL;
1077 }
1078
1079 GLboolean
1080 intelUnbindContext(__DRIcontext * driContextPriv)
1081 {
1082 /* Unset current context and dispath table */
1083 _mesa_make_current(NULL, NULL, NULL);
1084
1085 return true;
1086 }
1087
1088 /**
1089 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1090 * on window system framebuffers.
1091 *
1092 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1093 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1094 * sRGB encode if the renderbuffer can handle it. You can ask specifically
1095 * for a visual where you're guaranteed to be capable, but it turns out that
1096 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1097 * incapable ones, because there's no difference between the two in resources
1098 * used. Applications thus get built that accidentally rely on the default
1099 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
1100 * great...
1101 *
1102 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1103 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1104 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1105 * capable, do sRGB encode". Then, for your window system renderbuffers, you
1106 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1107 * and get no sRGB encode (assuming that both kinds of visual are available).
1108 * Thus our choice to support sRGB by default on our visuals for desktop would
1109 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1110 *
1111 * Unfortunately, renderbuffer setup happens before a context is created. So
1112 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1113 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1114 * yet), we go turn that back off before anyone finds out.
1115 */
1116 static void
1117 intel_gles3_srgb_workaround(struct brw_context *brw,
1118 struct gl_framebuffer *fb)
1119 {
1120 struct gl_context *ctx = &brw->ctx;
1121
1122 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1123 return;
1124
1125 /* Some day when we support the sRGB capable bit on visuals available for
1126 * GLES, we'll need to respect that and not disable things here.
1127 */
1128 fb->Visual.sRGBCapable = false;
1129 for (int i = 0; i < BUFFER_COUNT; i++) {
1130 struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1131 if (rb)
1132 rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1133 }
1134 }
1135
1136 GLboolean
1137 intelMakeCurrent(__DRIcontext * driContextPriv,
1138 __DRIdrawable * driDrawPriv,
1139 __DRIdrawable * driReadPriv)
1140 {
1141 struct brw_context *brw;
1142 GET_CURRENT_CONTEXT(curCtx);
1143
1144 if (driContextPriv)
1145 brw = (struct brw_context *) driContextPriv->driverPrivate;
1146 else
1147 brw = NULL;
1148
1149 /* According to the glXMakeCurrent() man page: "Pending commands to
1150 * the previous context, if any, are flushed before it is released."
1151 * But only flush if we're actually changing contexts.
1152 */
1153 if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1154 _mesa_flush(curCtx);
1155 }
1156
1157 if (driContextPriv) {
1158 struct gl_context *ctx = &brw->ctx;
1159 struct gl_framebuffer *fb, *readFb;
1160
1161 if (driDrawPriv == NULL) {
1162 fb = _mesa_get_incomplete_framebuffer();
1163 } else {
1164 fb = driDrawPriv->driverPrivate;
1165 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1166 }
1167
1168 if (driReadPriv == NULL) {
1169 readFb = _mesa_get_incomplete_framebuffer();
1170 } else {
1171 readFb = driReadPriv->driverPrivate;
1172 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1173 }
1174
1175 /* The sRGB workaround changes the renderbuffer's format. We must change
1176 * the format before the renderbuffer's miptree get's allocated, otherwise
1177 * the formats of the renderbuffer and its miptree will differ.
1178 */
1179 intel_gles3_srgb_workaround(brw, fb);
1180 intel_gles3_srgb_workaround(brw, readFb);
1181
1182 /* If the context viewport hasn't been initialized, force a call out to
1183 * the loader to get buffers so we have a drawable size for the initial
1184 * viewport. */
1185 if (!brw->ctx.ViewportInitialized)
1186 intel_prepare_render(brw);
1187
1188 _mesa_make_current(ctx, fb, readFb);
1189 } else {
1190 _mesa_make_current(NULL, NULL, NULL);
1191 }
1192
1193 return true;
1194 }
1195
1196 void
1197 intel_resolve_for_dri2_flush(struct brw_context *brw,
1198 __DRIdrawable *drawable)
1199 {
1200 if (brw->gen < 6) {
1201 /* MSAA and fast color clear are not supported, so don't waste time
1202 * checking whether a resolve is needed.
1203 */
1204 return;
1205 }
1206
1207 struct gl_framebuffer *fb = drawable->driverPrivate;
1208 struct intel_renderbuffer *rb;
1209
1210 /* Usually, only the back buffer will need to be downsampled. However,
1211 * the front buffer will also need it if the user has rendered into it.
1212 */
1213 static const gl_buffer_index buffers[2] = {
1214 BUFFER_BACK_LEFT,
1215 BUFFER_FRONT_LEFT,
1216 };
1217
1218 for (int i = 0; i < 2; ++i) {
1219 rb = intel_get_renderbuffer(fb, buffers[i]);
1220 if (rb == NULL || rb->mt == NULL)
1221 continue;
1222 if (rb->mt->surf.samples == 1) {
1223 assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1224 rb->layer_count == 1);
1225 intel_miptree_prepare_access(brw, rb->mt, 0, 1, 0, 1, false, false);
1226 } else {
1227 intel_renderbuffer_downsample(brw, rb);
1228 }
1229 }
1230 }
1231
1232 static unsigned
1233 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1234 {
1235 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1236 }
1237
1238 static void
1239 intel_query_dri2_buffers(struct brw_context *brw,
1240 __DRIdrawable *drawable,
1241 __DRIbuffer **buffers,
1242 int *count);
1243
1244 static void
1245 intel_process_dri2_buffer(struct brw_context *brw,
1246 __DRIdrawable *drawable,
1247 __DRIbuffer *buffer,
1248 struct intel_renderbuffer *rb,
1249 const char *buffer_name);
1250
1251 static void
1252 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1253
1254 static void
1255 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1256 {
1257 struct gl_framebuffer *fb = drawable->driverPrivate;
1258 struct intel_renderbuffer *rb;
1259 __DRIbuffer *buffers = NULL;
1260 int count;
1261 const char *region_name;
1262
1263 /* Set this up front, so that in case our buffers get invalidated
1264 * while we're getting new buffers, we don't clobber the stamp and
1265 * thus ignore the invalidate. */
1266 drawable->lastStamp = drawable->dri2.stamp;
1267
1268 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1269 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1270
1271 intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1272
1273 if (buffers == NULL)
1274 return;
1275
1276 for (int i = 0; i < count; i++) {
1277 switch (buffers[i].attachment) {
1278 case __DRI_BUFFER_FRONT_LEFT:
1279 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1280 region_name = "dri2 front buffer";
1281 break;
1282
1283 case __DRI_BUFFER_FAKE_FRONT_LEFT:
1284 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1285 region_name = "dri2 fake front buffer";
1286 break;
1287
1288 case __DRI_BUFFER_BACK_LEFT:
1289 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1290 region_name = "dri2 back buffer";
1291 break;
1292
1293 case __DRI_BUFFER_DEPTH:
1294 case __DRI_BUFFER_HIZ:
1295 case __DRI_BUFFER_DEPTH_STENCIL:
1296 case __DRI_BUFFER_STENCIL:
1297 case __DRI_BUFFER_ACCUM:
1298 default:
1299 fprintf(stderr,
1300 "unhandled buffer attach event, attachment type %d\n",
1301 buffers[i].attachment);
1302 return;
1303 }
1304
1305 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1306 }
1307
1308 }
1309
1310 void
1311 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1312 {
1313 struct brw_context *brw = context->driverPrivate;
1314 __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1315
1316 /* Set this up front, so that in case our buffers get invalidated
1317 * while we're getting new buffers, we don't clobber the stamp and
1318 * thus ignore the invalidate. */
1319 drawable->lastStamp = drawable->dri2.stamp;
1320
1321 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1322 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1323
1324 if (dri_screen->image.loader)
1325 intel_update_image_buffers(brw, drawable);
1326 else
1327 intel_update_dri2_buffers(brw, drawable);
1328
1329 driUpdateFramebufferSize(&brw->ctx, drawable);
1330 }
1331
1332 /**
1333 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1334 * state is required.
1335 */
1336 void
1337 intel_prepare_render(struct brw_context *brw)
1338 {
1339 struct gl_context *ctx = &brw->ctx;
1340 __DRIcontext *driContext = brw->driContext;
1341 __DRIdrawable *drawable;
1342
1343 drawable = driContext->driDrawablePriv;
1344 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1345 if (drawable->lastStamp != drawable->dri2.stamp)
1346 intel_update_renderbuffers(driContext, drawable);
1347 driContext->dri2.draw_stamp = drawable->dri2.stamp;
1348 }
1349
1350 drawable = driContext->driReadablePriv;
1351 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1352 if (drawable->lastStamp != drawable->dri2.stamp)
1353 intel_update_renderbuffers(driContext, drawable);
1354 driContext->dri2.read_stamp = drawable->dri2.stamp;
1355 }
1356
1357 /* If we're currently rendering to the front buffer, the rendering
1358 * that will happen next will probably dirty the front buffer. So
1359 * mark it as dirty here.
1360 */
1361 if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1362 brw->front_buffer_dirty = true;
1363 }
1364
1365 /**
1366 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1367 *
1368 * To determine which DRI buffers to request, examine the renderbuffers
1369 * attached to the drawable's framebuffer. Then request the buffers with
1370 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1371 *
1372 * This is called from intel_update_renderbuffers().
1373 *
1374 * \param drawable Drawable whose buffers are queried.
1375 * \param buffers [out] List of buffers returned by DRI2 query.
1376 * \param buffer_count [out] Number of buffers returned.
1377 *
1378 * \see intel_update_renderbuffers()
1379 * \see DRI2GetBuffers()
1380 * \see DRI2GetBuffersWithFormat()
1381 */
1382 static void
1383 intel_query_dri2_buffers(struct brw_context *brw,
1384 __DRIdrawable *drawable,
1385 __DRIbuffer **buffers,
1386 int *buffer_count)
1387 {
1388 __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1389 struct gl_framebuffer *fb = drawable->driverPrivate;
1390 int i = 0;
1391 unsigned attachments[8];
1392
1393 struct intel_renderbuffer *front_rb;
1394 struct intel_renderbuffer *back_rb;
1395
1396 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1397 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1398
1399 memset(attachments, 0, sizeof(attachments));
1400 if ((_mesa_is_front_buffer_drawing(fb) ||
1401 _mesa_is_front_buffer_reading(fb) ||
1402 !back_rb) && front_rb) {
1403 /* If a fake front buffer is in use, then querying for
1404 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1405 * the real front buffer to the fake front buffer. So before doing the
1406 * query, we need to make sure all the pending drawing has landed in the
1407 * real front buffer.
1408 */
1409 intel_batchbuffer_flush(brw);
1410 intel_flush_front(&brw->ctx);
1411
1412 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1413 attachments[i++] = intel_bits_per_pixel(front_rb);
1414 } else if (front_rb && brw->front_buffer_dirty) {
1415 /* We have pending front buffer rendering, but we aren't querying for a
1416 * front buffer. If the front buffer we have is a fake front buffer,
1417 * the X server is going to throw it away when it processes the query.
1418 * So before doing the query, make sure all the pending drawing has
1419 * landed in the real front buffer.
1420 */
1421 intel_batchbuffer_flush(brw);
1422 intel_flush_front(&brw->ctx);
1423 }
1424
1425 if (back_rb) {
1426 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1427 attachments[i++] = intel_bits_per_pixel(back_rb);
1428 }
1429
1430 assert(i <= ARRAY_SIZE(attachments));
1431
1432 *buffers =
1433 dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1434 &drawable->w,
1435 &drawable->h,
1436 attachments, i / 2,
1437 buffer_count,
1438 drawable->loaderPrivate);
1439 }
1440
1441 /**
1442 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1443 *
1444 * This is called from intel_update_renderbuffers().
1445 *
1446 * \par Note:
1447 * DRI buffers whose attachment point is DRI2BufferStencil or
1448 * DRI2BufferDepthStencil are handled as special cases.
1449 *
1450 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1451 * that is passed to brw_bo_gem_create_from_name().
1452 *
1453 * \see intel_update_renderbuffers()
1454 */
1455 static void
1456 intel_process_dri2_buffer(struct brw_context *brw,
1457 __DRIdrawable *drawable,
1458 __DRIbuffer *buffer,
1459 struct intel_renderbuffer *rb,
1460 const char *buffer_name)
1461 {
1462 struct gl_framebuffer *fb = drawable->driverPrivate;
1463 struct brw_bo *bo;
1464
1465 if (!rb)
1466 return;
1467
1468 unsigned num_samples = rb->Base.Base.NumSamples;
1469
1470 /* We try to avoid closing and reopening the same BO name, because the first
1471 * use of a mapping of the buffer involves a bunch of page faulting which is
1472 * moderately expensive.
1473 */
1474 struct intel_mipmap_tree *last_mt;
1475 if (num_samples == 0)
1476 last_mt = rb->mt;
1477 else
1478 last_mt = rb->singlesample_mt;
1479
1480 uint32_t old_name = 0;
1481 if (last_mt) {
1482 /* The bo already has a name because the miptree was created by a
1483 * previous call to intel_process_dri2_buffer(). If a bo already has a
1484 * name, then brw_bo_flink() is a low-cost getter. It does not
1485 * create a new name.
1486 */
1487 brw_bo_flink(last_mt->bo, &old_name);
1488 }
1489
1490 if (old_name == buffer->name)
1491 return;
1492
1493 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1494 fprintf(stderr,
1495 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1496 buffer->name, buffer->attachment,
1497 buffer->cpp, buffer->pitch);
1498 }
1499
1500 bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1501 buffer->name);
1502 if (!bo) {
1503 fprintf(stderr,
1504 "Failed to open BO for returned DRI2 buffer "
1505 "(%dx%d, %s, named %d).\n"
1506 "This is likely a bug in the X Server that will lead to a "
1507 "crash soon.\n",
1508 drawable->w, drawable->h, buffer_name, buffer->name);
1509 return;
1510 }
1511
1512 struct intel_mipmap_tree *mt =
1513 intel_miptree_create_for_bo(brw,
1514 bo,
1515 intel_rb_format(rb),
1516 0,
1517 drawable->w,
1518 drawable->h,
1519 1,
1520 buffer->pitch,
1521 MIPTREE_CREATE_DEFAULT);
1522 if (!mt) {
1523 brw_bo_unreference(bo);
1524 return;
1525 }
1526
1527 /* We got this BO from X11. We cana't assume that we have coherent texture
1528 * access because X may suddenly decide to use it for scan-out which would
1529 * destroy coherency.
1530 */
1531 bo->cache_coherent = false;
1532
1533 if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1534 drawable->w, drawable->h,
1535 buffer->pitch)) {
1536 brw_bo_unreference(bo);
1537 intel_miptree_release(&mt);
1538 return;
1539 }
1540
1541 if (_mesa_is_front_buffer_drawing(fb) &&
1542 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1543 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1544 rb->Base.Base.NumSamples > 1) {
1545 intel_renderbuffer_upsample(brw, rb);
1546 }
1547
1548 assert(rb->mt);
1549
1550 brw_bo_unreference(bo);
1551 }
1552
1553 /**
1554 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1555 *
1556 * To determine which DRI buffers to request, examine the renderbuffers
1557 * attached to the drawable's framebuffer. Then request the buffers from
1558 * the image loader
1559 *
1560 * This is called from intel_update_renderbuffers().
1561 *
1562 * \param drawable Drawable whose buffers are queried.
1563 * \param buffers [out] List of buffers returned by DRI2 query.
1564 * \param buffer_count [out] Number of buffers returned.
1565 *
1566 * \see intel_update_renderbuffers()
1567 */
1568
1569 static void
1570 intel_update_image_buffer(struct brw_context *intel,
1571 __DRIdrawable *drawable,
1572 struct intel_renderbuffer *rb,
1573 __DRIimage *buffer,
1574 enum __DRIimageBufferMask buffer_type)
1575 {
1576 struct gl_framebuffer *fb = drawable->driverPrivate;
1577
1578 if (!rb || !buffer->bo)
1579 return;
1580
1581 unsigned num_samples = rb->Base.Base.NumSamples;
1582
1583 /* Check and see if we're already bound to the right
1584 * buffer object
1585 */
1586 struct intel_mipmap_tree *last_mt;
1587 if (num_samples == 0)
1588 last_mt = rb->mt;
1589 else
1590 last_mt = rb->singlesample_mt;
1591
1592 if (last_mt && last_mt->bo == buffer->bo)
1593 return;
1594
1595 enum isl_colorspace colorspace;
1596 switch (_mesa_get_format_color_encoding(intel_rb_format(rb))) {
1597 case GL_SRGB:
1598 colorspace = ISL_COLORSPACE_SRGB;
1599 break;
1600 case GL_LINEAR:
1601 colorspace = ISL_COLORSPACE_LINEAR;
1602 break;
1603 default:
1604 unreachable("Invalid color encoding");
1605 }
1606
1607 struct intel_mipmap_tree *mt =
1608 intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1609 colorspace, true);
1610 if (!mt)
1611 return;
1612
1613 if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1614 buffer->width, buffer->height,
1615 buffer->pitch)) {
1616 intel_miptree_release(&mt);
1617 return;
1618 }
1619
1620 if (_mesa_is_front_buffer_drawing(fb) &&
1621 buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1622 rb->Base.Base.NumSamples > 1) {
1623 intel_renderbuffer_upsample(intel, rb);
1624 }
1625 }
1626
1627 static void
1628 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1629 {
1630 struct gl_framebuffer *fb = drawable->driverPrivate;
1631 __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1632 struct intel_renderbuffer *front_rb;
1633 struct intel_renderbuffer *back_rb;
1634 struct __DRIimageList images;
1635 mesa_format format;
1636 uint32_t buffer_mask = 0;
1637 int ret;
1638
1639 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1640 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1641
1642 if (back_rb)
1643 format = intel_rb_format(back_rb);
1644 else if (front_rb)
1645 format = intel_rb_format(front_rb);
1646 else
1647 return;
1648
1649 if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1650 _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1651 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1652 }
1653
1654 if (back_rb)
1655 buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1656
1657 ret = dri_screen->image.loader->getBuffers(drawable,
1658 driGLFormatToImageFormat(format),
1659 &drawable->dri2.stamp,
1660 drawable->loaderPrivate,
1661 buffer_mask,
1662 &images);
1663 if (!ret)
1664 return;
1665
1666 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1667 drawable->w = images.front->width;
1668 drawable->h = images.front->height;
1669 intel_update_image_buffer(brw,
1670 drawable,
1671 front_rb,
1672 images.front,
1673 __DRI_IMAGE_BUFFER_FRONT);
1674 }
1675
1676 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1677 drawable->w = images.back->width;
1678 drawable->h = images.back->height;
1679 intel_update_image_buffer(brw,
1680 drawable,
1681 back_rb,
1682 images.back,
1683 __DRI_IMAGE_BUFFER_BACK);
1684 }
1685 }