740a33ae4d5bd15432d88cd4f9db78f2ed7aacc6
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 */
32
33
34 #include "main/api_exec.h"
35 #include "main/context.h"
36 #include "main/fbobject.h"
37 #include "main/imports.h"
38 #include "main/macros.h"
39 #include "main/points.h"
40 #include "main/simple_list.h"
41 #include "main/version.h"
42 #include "main/vtxfmt.h"
43
44 #include "vbo/vbo_context.h"
45
46 #include "drivers/common/driverfuncs.h"
47 #include "drivers/common/meta.h"
48 #include "utils.h"
49
50 #include "brw_context.h"
51 #include "brw_defines.h"
52 #include "brw_draw.h"
53 #include "brw_state.h"
54
55 #include "intel_batchbuffer.h"
56 #include "intel_buffer_objects.h"
57 #include "intel_buffers.h"
58 #include "intel_fbo.h"
59 #include "intel_mipmap_tree.h"
60 #include "intel_pixel.h"
61 #include "intel_regions.h"
62 #include "intel_tex.h"
63 #include "intel_tex_obj.h"
64
65 #include "swrast_setup/swrast_setup.h"
66 #include "tnl/tnl.h"
67 #include "tnl/t_pipeline.h"
68 #include "glsl/ralloc.h"
69
70 /***************************************
71 * Mesa's Driver Functions
72 ***************************************/
73
74 static size_t
75 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
76 GLenum internalFormat, int samples[16])
77 {
78 struct brw_context *brw = brw_context(ctx);
79
80 (void) target;
81
82 switch (brw->gen) {
83 case 7:
84 samples[0] = 8;
85 samples[1] = 4;
86 return 2;
87
88 case 6:
89 samples[0] = 4;
90 return 1;
91
92 default:
93 samples[0] = 1;
94 return 1;
95 }
96 }
97
98 static const GLubyte *
99 intelGetString(struct gl_context * ctx, GLenum name)
100 {
101 const struct brw_context *const brw = brw_context(ctx);
102 const char *chipset;
103 static char buffer[128];
104
105 switch (name) {
106 case GL_VENDOR:
107 return (GLubyte *) "Intel Open Source Technology Center";
108 break;
109
110 case GL_RENDERER:
111 switch (brw->intelScreen->deviceID) {
112 #undef CHIPSET
113 #define CHIPSET(id, family, str) case id: chipset = str; break;
114 #include "pci_ids/i965_pci_ids.h"
115 default:
116 chipset = "Unknown Intel Chipset";
117 break;
118 }
119
120 (void) driGetRendererString(buffer, chipset, 0);
121 return (GLubyte *) buffer;
122
123 default:
124 return NULL;
125 }
126 }
127
128 static void
129 intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
130 {
131 struct brw_context *brw = brw_context(ctx);
132 __DRIcontext *driContext = brw->driContext;
133
134 if (brw->saved_viewport)
135 brw->saved_viewport(ctx, x, y, w, h);
136
137 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
138 dri2InvalidateDrawable(driContext->driDrawablePriv);
139 dri2InvalidateDrawable(driContext->driReadablePriv);
140 }
141 }
142
143 static void
144 intelInvalidateState(struct gl_context * ctx, GLuint new_state)
145 {
146 struct brw_context *brw = brw_context(ctx);
147
148 if (ctx->swrast_context)
149 _swrast_InvalidateState(ctx, new_state);
150 _vbo_InvalidateState(ctx, new_state);
151
152 brw->NewGLState |= new_state;
153 }
154
155 static void
156 intel_flush_front(struct gl_context *ctx)
157 {
158 struct brw_context *brw = brw_context(ctx);
159 __DRIcontext *driContext = brw->driContext;
160 __DRIdrawable *driDrawable = driContext->driDrawablePriv;
161 __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
162
163 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
164 if (screen->dri2.loader->flushFrontBuffer != NULL &&
165 driDrawable &&
166 driDrawable->loaderPrivate) {
167
168 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
169 *
170 * This potentially resolves both front and back buffer. It
171 * is unnecessary to resolve the back, but harms nothing except
172 * performance. And no one cares about front-buffer render
173 * performance.
174 */
175 intel_resolve_for_dri2_flush(brw, driDrawable);
176 intel_batchbuffer_flush(brw);
177
178 screen->dri2.loader->flushFrontBuffer(driDrawable,
179 driDrawable->loaderPrivate);
180
181 /* We set the dirty bit in intel_prepare_render() if we're
182 * front buffer rendering once we get there.
183 */
184 brw->front_buffer_dirty = false;
185 }
186 }
187 }
188
189 static void
190 intel_glFlush(struct gl_context *ctx)
191 {
192 struct brw_context *brw = brw_context(ctx);
193
194 intel_batchbuffer_flush(brw);
195 intel_flush_front(ctx);
196 if (brw->is_front_buffer_rendering)
197 brw->need_throttle = true;
198 }
199
200 void
201 intelFinish(struct gl_context * ctx)
202 {
203 struct brw_context *brw = brw_context(ctx);
204
205 intel_glFlush(ctx);
206
207 if (brw->batch.last_bo)
208 drm_intel_bo_wait_rendering(brw->batch.last_bo);
209 }
210
211 static void
212 brwInitDriverFunctions(struct intel_screen *screen,
213 struct dd_function_table *functions)
214 {
215 _mesa_init_driver_functions(functions);
216
217 functions->Flush = intel_glFlush;
218 functions->Finish = intelFinish;
219 functions->GetString = intelGetString;
220 functions->UpdateState = intelInvalidateState;
221
222 intelInitTextureFuncs(functions);
223 intelInitTextureImageFuncs(functions);
224 intelInitTextureSubImageFuncs(functions);
225 intelInitTextureCopyImageFuncs(functions);
226 intelInitClearFuncs(functions);
227 intelInitBufferFuncs(functions);
228 intelInitPixelFuncs(functions);
229 intelInitBufferObjectFuncs(functions);
230 intel_init_syncobj_functions(functions);
231 brw_init_object_purgeable_functions(functions);
232
233 brwInitFragProgFuncs( functions );
234 brw_init_common_queryobj_functions(functions);
235 if (screen->devinfo->gen >= 6)
236 gen6_init_queryobj_functions(functions);
237 else
238 gen4_init_queryobj_functions(functions);
239
240 functions->QuerySamplesForFormat = brw_query_samples_for_format;
241
242 if (screen->devinfo->gen >= 7) {
243 functions->BeginTransformFeedback = gen7_begin_transform_feedback;
244 functions->EndTransformFeedback = gen7_end_transform_feedback;
245 } else {
246 functions->BeginTransformFeedback = brw_begin_transform_feedback;
247 functions->EndTransformFeedback = brw_end_transform_feedback;
248 }
249
250 if (screen->devinfo->gen >= 6)
251 functions->GetSamplePosition = gen6_get_sample_position;
252 }
253
254 static void
255 brw_initialize_context_constants(struct brw_context *brw)
256 {
257 struct gl_context *ctx = &brw->ctx;
258
259 ctx->Const.QueryCounterBits.Timestamp = 36;
260
261 ctx->Const.StripTextureBorder = true;
262
263 ctx->Const.MaxDualSourceDrawBuffers = 1;
264 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
265 ctx->Const.FragmentProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
266 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
267 ctx->Const.MaxTextureUnits =
268 MIN2(ctx->Const.MaxTextureCoordUnits,
269 ctx->Const.FragmentProgram.MaxTextureImageUnits);
270 ctx->Const.VertexProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
271 ctx->Const.MaxCombinedTextureImageUnits =
272 ctx->Const.VertexProgram.MaxTextureImageUnits +
273 ctx->Const.FragmentProgram.MaxTextureImageUnits;
274
275 ctx->Const.MaxTextureLevels = 14; /* 8192 */
276 if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
277 ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
278 ctx->Const.Max3DTextureLevels = 9;
279 ctx->Const.MaxCubeTextureLevels = 12;
280
281 if (brw->gen >= 7)
282 ctx->Const.MaxArrayTextureLayers = 2048;
283 else
284 ctx->Const.MaxArrayTextureLayers = 512;
285
286 ctx->Const.MaxTextureRectSize = 1 << 12;
287
288 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
289
290 ctx->Const.MaxRenderbufferSize = 8192;
291
292 /* Hardware only supports a limited number of transform feedback buffers.
293 * So we need to override the Mesa default (which is based only on software
294 * limits).
295 */
296 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
297
298 /* On Gen6, in the worst case, we use up one binding table entry per
299 * transform feedback component (see comments above the definition of
300 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
301 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
302 * BRW_MAX_SOL_BINDINGS.
303 *
304 * In "separate components" mode, we need to divide this value by
305 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
306 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
307 */
308 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
309 ctx->Const.MaxTransformFeedbackSeparateComponents =
310 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
311
312 if (brw->gen == 6) {
313 ctx->Const.MaxSamples = 4;
314 ctx->Const.MaxColorTextureSamples = 4;
315 ctx->Const.MaxDepthTextureSamples = 4;
316 ctx->Const.MaxIntegerSamples = 4;
317 } else if (brw->gen >= 7) {
318 ctx->Const.MaxSamples = 8;
319 ctx->Const.MaxColorTextureSamples = 8;
320 ctx->Const.MaxDepthTextureSamples = 8;
321 ctx->Const.MaxIntegerSamples = 8;
322 ctx->Const.MaxProgramTextureGatherComponents = 4;
323 }
324
325 ctx->Const.MinLineWidth = 1.0;
326 ctx->Const.MinLineWidthAA = 1.0;
327 ctx->Const.MaxLineWidth = 5.0;
328 ctx->Const.MaxLineWidthAA = 5.0;
329 ctx->Const.LineWidthGranularity = 0.5;
330
331 ctx->Const.MinPointSize = 1.0;
332 ctx->Const.MinPointSizeAA = 1.0;
333 ctx->Const.MaxPointSize = 255.0;
334 ctx->Const.MaxPointSizeAA = 255.0;
335 ctx->Const.PointSizeGranularity = 1.0;
336
337 if (brw->gen >= 5 || brw->is_g4x)
338 ctx->Const.MaxClipPlanes = 8;
339
340 ctx->Const.VertexProgram.MaxNativeInstructions = 16 * 1024;
341 ctx->Const.VertexProgram.MaxAluInstructions = 0;
342 ctx->Const.VertexProgram.MaxTexInstructions = 0;
343 ctx->Const.VertexProgram.MaxTexIndirections = 0;
344 ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
345 ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
346 ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
347 ctx->Const.VertexProgram.MaxNativeAttribs = 16;
348 ctx->Const.VertexProgram.MaxNativeTemps = 256;
349 ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
350 ctx->Const.VertexProgram.MaxNativeParameters = 1024;
351 ctx->Const.VertexProgram.MaxEnvParams =
352 MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
353 ctx->Const.VertexProgram.MaxEnvParams);
354
355 ctx->Const.FragmentProgram.MaxNativeInstructions = 1024;
356 ctx->Const.FragmentProgram.MaxNativeAluInstructions = 1024;
357 ctx->Const.FragmentProgram.MaxNativeTexInstructions = 1024;
358 ctx->Const.FragmentProgram.MaxNativeTexIndirections = 1024;
359 ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
360 ctx->Const.FragmentProgram.MaxNativeTemps = 256;
361 ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
362 ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
363 ctx->Const.FragmentProgram.MaxEnvParams =
364 MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
365 ctx->Const.FragmentProgram.MaxEnvParams);
366
367 /* Fragment shaders use real, 32-bit twos-complement integers for all
368 * integer types.
369 */
370 ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
371 ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
372 ctx->Const.FragmentProgram.LowInt.Precision = 0;
373 ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.LowInt;
374 ctx->Const.FragmentProgram.MediumInt = ctx->Const.FragmentProgram.LowInt;
375
376 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
377 * but we're not sure how it's actually done for vertex order,
378 * that affect provoking vertex decision. Always use last vertex
379 * convention for quad primitive which works as expected for now.
380 */
381 if (brw->gen >= 6)
382 ctx->Const.QuadsFollowProvokingVertexConvention = false;
383
384 ctx->Const.NativeIntegers = true;
385 ctx->Const.UniformBooleanTrue = 1;
386 ctx->Const.UniformBufferOffsetAlignment = 16;
387
388 if (brw->gen >= 6) {
389 ctx->Const.MaxVarying = 32;
390 ctx->Const.VertexProgram.MaxOutputComponents = 128;
391 ctx->Const.GeometryProgram.MaxInputComponents = 128;
392 ctx->Const.GeometryProgram.MaxOutputComponents = 128;
393 ctx->Const.FragmentProgram.MaxInputComponents = 128;
394 }
395
396 /* We want the GLSL compiler to emit code that uses condition codes */
397 for (int i = 0; i < MESA_SHADER_TYPES; i++) {
398 ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
399 ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
400 ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
401 ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
402 ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
403 ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
404
405 ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
406 (i == MESA_SHADER_FRAGMENT);
407 ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
408 (i == MESA_SHADER_FRAGMENT);
409 ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
410 }
411
412 ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = true;
413 }
414
415 /**
416 * Process driconf (drirc) options, setting appropriate context flags.
417 *
418 * intelInitExtensions still pokes at optionCache directly, in order to
419 * avoid advertising various extensions. No flags are set, so it makes
420 * sense to continue doing that there.
421 */
422 static void
423 brw_process_driconf_options(struct brw_context *brw)
424 {
425 struct gl_context *ctx = &brw->ctx;
426
427 driOptionCache *options = &brw->optionCache;
428 driParseConfigFiles(options, &brw->intelScreen->optionCache,
429 brw->driContext->driScreenPriv->myNum, "i965");
430
431 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
432 switch (bo_reuse_mode) {
433 case DRI_CONF_BO_REUSE_DISABLED:
434 break;
435 case DRI_CONF_BO_REUSE_ALL:
436 intel_bufmgr_gem_enable_reuse(brw->bufmgr);
437 break;
438 }
439
440 if (!driQueryOptionb(options, "hiz")) {
441 brw->has_hiz = false;
442 /* On gen6, you can only do separate stencil with HIZ. */
443 if (brw->gen == 6)
444 brw->has_separate_stencil = false;
445 }
446
447 if (driQueryOptionb(options, "always_flush_batch")) {
448 fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
449 brw->always_flush_batch = true;
450 }
451
452 if (driQueryOptionb(options, "always_flush_cache")) {
453 fprintf(stderr, "flushing GPU caches before/after each draw call\n");
454 brw->always_flush_cache = true;
455 }
456
457 if (driQueryOptionb(options, "disable_throttling")) {
458 fprintf(stderr, "disabling flush throttling\n");
459 brw->disable_throttling = true;
460 }
461
462 brw->disable_derivative_optimization =
463 driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
464
465 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
466
467 ctx->Const.ForceGLSLExtensionsWarn =
468 driQueryOptionb(options, "force_glsl_extensions_warn");
469
470 ctx->Const.DisableGLSLLineContinuations =
471 driQueryOptionb(options, "disable_glsl_line_continuations");
472 }
473
474 bool
475 brwCreateContext(gl_api api,
476 const struct gl_config *mesaVis,
477 __DRIcontext *driContextPriv,
478 unsigned major_version,
479 unsigned minor_version,
480 uint32_t flags,
481 unsigned *dri_ctx_error,
482 void *sharedContextPrivate)
483 {
484 __DRIscreen *sPriv = driContextPriv->driScreenPriv;
485 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
486 struct intel_screen *screen = sPriv->driverPrivate;
487 const struct brw_device_info *devinfo = screen->devinfo;
488 struct dd_function_table functions;
489 struct gl_config visual;
490
491 struct brw_context *brw = rzalloc(NULL, struct brw_context);
492 if (!brw) {
493 printf("%s: failed to alloc context\n", __FUNCTION__);
494 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
495 return false;
496 }
497
498 driContextPriv->driverPrivate = brw;
499 brw->driContext = driContextPriv;
500 brw->intelScreen = screen;
501 brw->bufmgr = screen->bufmgr;
502
503 brw->gen = devinfo->gen;
504 brw->gt = devinfo->gt;
505 brw->is_g4x = devinfo->is_g4x;
506 brw->is_baytrail = devinfo->is_baytrail;
507 brw->is_haswell = devinfo->is_haswell;
508 brw->has_llc = devinfo->has_llc;
509 brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
510 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
511 brw->has_pln = devinfo->has_pln;
512 brw->has_compr4 = devinfo->has_compr4;
513 brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
514 brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
515 brw->needs_unlit_centroid_workaround =
516 devinfo->needs_unlit_centroid_workaround;
517
518 brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
519 brw->has_swizzling = screen->hw_has_swizzling;
520
521 brwInitVtbl( brw );
522
523 brwInitDriverFunctions(screen, &functions);
524
525 struct gl_context *ctx = &brw->ctx;
526
527 if (mesaVis == NULL) {
528 memset(&visual, 0, sizeof visual);
529 mesaVis = &visual;
530 }
531
532 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
533 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
534 printf("%s: failed to init mesa context\n", __FUNCTION__);
535 intelDestroyContext(driContextPriv);
536 return false;
537 }
538
539 brw_process_driconf_options(brw);
540
541 if (!intelInitContext( brw, api, major_version, minor_version,
542 mesaVis, driContextPriv,
543 sharedContextPrivate, &functions,
544 dri_ctx_error)) {
545 intelDestroyContext(driContextPriv);
546 return false;
547 }
548
549 brw_initialize_context_constants(brw);
550
551 /* Reinitialize the context point state. It depends on ctx->Const values. */
552 _mesa_init_point(ctx);
553
554 if (brw->gen >= 6) {
555 /* Create a new hardware context. Using a hardware context means that
556 * our GPU state will be saved/restored on context switch, allowing us
557 * to assume that the GPU is in the same state we left it in.
558 *
559 * This is required for transform feedback buffer offsets, query objects,
560 * and also allows us to reduce how much state we have to emit.
561 */
562 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
563
564 if (!brw->hw_ctx) {
565 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
566 intelDestroyContext(driContextPriv);
567 return false;
568 }
569 }
570
571 brw_init_surface_formats(brw);
572
573 /* Initialize swrast, tnl driver tables: */
574 TNLcontext *tnl = TNL_CONTEXT(ctx);
575 if (tnl)
576 tnl->Driver.RunPipeline = _tnl_run_pipeline;
577
578 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
579 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
580 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
581
582 if (brw->is_g4x || brw->gen >= 5) {
583 brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
584 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
585 } else {
586 brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
587 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
588 }
589
590 brw->max_vs_threads = devinfo->max_vs_threads;
591 brw->max_gs_threads = devinfo->max_gs_threads;
592 brw->max_wm_threads = devinfo->max_wm_threads;
593 brw->urb.size = devinfo->urb.size;
594 brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
595 brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
596 brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
597
598 if (brw->gen == 6)
599 brw->urb.gen6_gs_previously_active = false;
600
601 brw->prim_restart.in_progress = false;
602 brw->prim_restart.enable_cut_index = false;
603
604 brw_init_state( brw );
605
606 if (brw->gen < 6) {
607 brw->curbe.last_buf = calloc(1, 4096);
608 brw->curbe.next_buf = calloc(1, 4096);
609 }
610
611 brw->batch.need_workaround_flush = true;
612
613 ctx->VertexProgram._MaintainTnlProgram = true;
614 ctx->FragmentProgram._MaintainTexEnvProgram = true;
615
616 brw_draw_init( brw );
617
618 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
619 /* Turn on some extra GL_ARB_debug_output generation. */
620 brw->perf_debug = true;
621 }
622
623 brw_fs_alloc_reg_sets(brw);
624 brw_vec4_alloc_reg_set(brw);
625
626 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
627 brw_init_shader_time(brw);
628
629 _mesa_compute_version(ctx);
630
631 _mesa_initialize_dispatch_tables(ctx);
632 _mesa_initialize_vbo_vtxfmt(ctx);
633
634 if (ctx->Extensions.AMD_performance_monitor) {
635 brw_init_performance_monitors(brw);
636 }
637
638 return true;
639 }
640
641 bool
642 intelInitContext(struct brw_context *brw,
643 int api,
644 unsigned major_version,
645 unsigned minor_version,
646 const struct gl_config * mesaVis,
647 __DRIcontext * driContextPriv,
648 void *sharedContextPrivate,
649 struct dd_function_table *functions,
650 unsigned *dri_ctx_error)
651 {
652 struct gl_context *ctx = &brw->ctx;
653
654 /* GLX uses DRI2 invalidate events to handle window resizing.
655 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
656 * which doesn't provide a mechanism for snooping the event queues.
657 *
658 * So EGL still relies on viewport hacks to handle window resizing.
659 * This should go away with DRI3000.
660 */
661 if (!driContextPriv->driScreenPriv->dri2.useInvalidate) {
662 brw->saved_viewport = functions->Viewport;
663 functions->Viewport = intel_viewport;
664 }
665
666 /* Estimate the size of the mappable aperture into the GTT. There's an
667 * ioctl to get the whole GTT size, but not one to get the mappable subset.
668 * It turns out it's basically always 256MB, though some ancient hardware
669 * was smaller.
670 */
671 uint32_t gtt_size = 256 * 1024 * 1024;
672
673 /* We don't want to map two objects such that a memcpy between them would
674 * just fault one mapping in and then the other over and over forever. So
675 * we would need to divide the GTT size by 2. Additionally, some GTT is
676 * taken up by things like the framebuffer and the ringbuffer and such, so
677 * be more conservative.
678 */
679 brw->max_gtt_map_object_size = gtt_size / 4;
680
681 /* Initialize the software rasterizer and helper modules.
682 *
683 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
684 * software fallbacks (which we have to support on legacy GL to do weird
685 * glDrawPixels(), glBitmap(), and other functions).
686 */
687 if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
688 _swrast_CreateContext(ctx);
689 }
690
691 _vbo_CreateContext(ctx);
692 if (ctx->swrast_context) {
693 _tnl_CreateContext(ctx);
694 _swsetup_CreateContext(ctx);
695
696 /* Configure swrast to match hardware characteristics: */
697 _swrast_allow_pixel_fog(ctx, false);
698 _swrast_allow_vertex_fog(ctx, true);
699 }
700
701 _mesa_meta_init(ctx);
702
703 intelInitExtensions(ctx);
704
705 brw_process_intel_debug_variable(brw);
706
707 intel_batchbuffer_init(brw);
708
709 intel_fbo_init(brw);
710
711 return true;
712 }
713
714 void
715 intelDestroyContext(__DRIcontext * driContextPriv)
716 {
717 struct brw_context *brw =
718 (struct brw_context *) driContextPriv->driverPrivate;
719 struct gl_context *ctx = &brw->ctx;
720
721 assert(brw); /* should never be null */
722 if (brw) {
723 /* Dump a final BMP in case the application doesn't call SwapBuffers */
724 if (INTEL_DEBUG & DEBUG_AUB) {
725 intel_batchbuffer_flush(brw);
726 aub_dump_bmp(&brw->ctx);
727 }
728
729 _mesa_meta_free(&brw->ctx);
730
731 brw->vtbl.destroy(brw);
732
733 if (ctx->swrast_context) {
734 _swsetup_DestroyContext(&brw->ctx);
735 _tnl_DestroyContext(&brw->ctx);
736 }
737 _vbo_DestroyContext(&brw->ctx);
738
739 if (ctx->swrast_context)
740 _swrast_DestroyContext(&brw->ctx);
741
742 intel_batchbuffer_free(brw);
743
744 drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
745 brw->first_post_swapbuffers_batch = NULL;
746
747 driDestroyOptionCache(&brw->optionCache);
748
749 /* free the Mesa context */
750 _mesa_free_context_data(&brw->ctx);
751
752 ralloc_free(brw);
753 driContextPriv->driverPrivate = NULL;
754 }
755 }
756
757 GLboolean
758 intelUnbindContext(__DRIcontext * driContextPriv)
759 {
760 /* Unset current context and dispath table */
761 _mesa_make_current(NULL, NULL, NULL);
762
763 return true;
764 }
765
766 /**
767 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
768 * on window system framebuffers.
769 *
770 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
771 * your renderbuffer can do sRGB encode, and you can flip a switch that does
772 * sRGB encode if the renderbuffer can handle it. You can ask specifically
773 * for a visual where you're guaranteed to be capable, but it turns out that
774 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
775 * incapable ones, becuase there's no difference between the two in resources
776 * used. Applications thus get built that accidentally rely on the default
777 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
778 * great...
779 *
780 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
781 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
782 * So they removed the enable knob and made it "if the renderbuffer is sRGB
783 * capable, do sRGB encode". Then, for your window system renderbuffers, you
784 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
785 * and get no sRGB encode (assuming that both kinds of visual are available).
786 * Thus our choice to support sRGB by default on our visuals for desktop would
787 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
788 *
789 * Unfortunately, renderbuffer setup happens before a context is created. So
790 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
791 * context (without an sRGB visual, though we don't have sRGB visuals exposed
792 * yet), we go turn that back off before anyone finds out.
793 */
794 static void
795 intel_gles3_srgb_workaround(struct brw_context *brw,
796 struct gl_framebuffer *fb)
797 {
798 struct gl_context *ctx = &brw->ctx;
799
800 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
801 return;
802
803 /* Some day when we support the sRGB capable bit on visuals available for
804 * GLES, we'll need to respect that and not disable things here.
805 */
806 fb->Visual.sRGBCapable = false;
807 for (int i = 0; i < BUFFER_COUNT; i++) {
808 if (fb->Attachment[i].Renderbuffer &&
809 fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_SARGB8) {
810 fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_ARGB8888;
811 }
812 }
813 }
814
815 GLboolean
816 intelMakeCurrent(__DRIcontext * driContextPriv,
817 __DRIdrawable * driDrawPriv,
818 __DRIdrawable * driReadPriv)
819 {
820 struct brw_context *brw;
821 GET_CURRENT_CONTEXT(curCtx);
822
823 if (driContextPriv)
824 brw = (struct brw_context *) driContextPriv->driverPrivate;
825 else
826 brw = NULL;
827
828 /* According to the glXMakeCurrent() man page: "Pending commands to
829 * the previous context, if any, are flushed before it is released."
830 * But only flush if we're actually changing contexts.
831 */
832 if (brw_context(curCtx) && brw_context(curCtx) != brw) {
833 _mesa_flush(curCtx);
834 }
835
836 if (driContextPriv) {
837 struct gl_context *ctx = &brw->ctx;
838 struct gl_framebuffer *fb, *readFb;
839
840 if (driDrawPriv == NULL && driReadPriv == NULL) {
841 fb = _mesa_get_incomplete_framebuffer();
842 readFb = _mesa_get_incomplete_framebuffer();
843 } else {
844 fb = driDrawPriv->driverPrivate;
845 readFb = driReadPriv->driverPrivate;
846 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
847 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
848 }
849
850 /* The sRGB workaround changes the renderbuffer's format. We must change
851 * the format before the renderbuffer's miptree get's allocated, otherwise
852 * the formats of the renderbuffer and its miptree will differ.
853 */
854 intel_gles3_srgb_workaround(brw, fb);
855 intel_gles3_srgb_workaround(brw, readFb);
856
857 intel_prepare_render(brw);
858 _mesa_make_current(ctx, fb, readFb);
859 } else {
860 _mesa_make_current(NULL, NULL, NULL);
861 }
862
863 return true;
864 }
865
866 void
867 intel_resolve_for_dri2_flush(struct brw_context *brw,
868 __DRIdrawable *drawable)
869 {
870 if (brw->gen < 6) {
871 /* MSAA and fast color clear are not supported, so don't waste time
872 * checking whether a resolve is needed.
873 */
874 return;
875 }
876
877 struct gl_framebuffer *fb = drawable->driverPrivate;
878 struct intel_renderbuffer *rb;
879
880 /* Usually, only the back buffer will need to be downsampled. However,
881 * the front buffer will also need it if the user has rendered into it.
882 */
883 static const gl_buffer_index buffers[2] = {
884 BUFFER_BACK_LEFT,
885 BUFFER_FRONT_LEFT,
886 };
887
888 for (int i = 0; i < 2; ++i) {
889 rb = intel_get_renderbuffer(fb, buffers[i]);
890 if (rb == NULL || rb->mt == NULL)
891 continue;
892 if (rb->mt->num_samples <= 1)
893 intel_miptree_resolve_color(brw, rb->mt);
894 else
895 intel_miptree_downsample(brw, rb->mt);
896 }
897 }
898
899 static unsigned
900 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
901 {
902 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
903 }
904
905 static void
906 intel_query_dri2_buffers(struct brw_context *brw,
907 __DRIdrawable *drawable,
908 __DRIbuffer **buffers,
909 int *count);
910
911 static void
912 intel_process_dri2_buffer(struct brw_context *brw,
913 __DRIdrawable *drawable,
914 __DRIbuffer *buffer,
915 struct intel_renderbuffer *rb,
916 const char *buffer_name);
917
918 void
919 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
920 {
921 struct gl_framebuffer *fb = drawable->driverPrivate;
922 struct intel_renderbuffer *rb;
923 struct brw_context *brw = context->driverPrivate;
924 __DRIbuffer *buffers = NULL;
925 int i, count;
926 const char *region_name;
927
928 /* Set this up front, so that in case our buffers get invalidated
929 * while we're getting new buffers, we don't clobber the stamp and
930 * thus ignore the invalidate. */
931 drawable->lastStamp = drawable->dri2.stamp;
932
933 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
934 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
935
936 intel_query_dri2_buffers(brw, drawable, &buffers, &count);
937
938 if (buffers == NULL)
939 return;
940
941 for (i = 0; i < count; i++) {
942 switch (buffers[i].attachment) {
943 case __DRI_BUFFER_FRONT_LEFT:
944 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
945 region_name = "dri2 front buffer";
946 break;
947
948 case __DRI_BUFFER_FAKE_FRONT_LEFT:
949 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
950 region_name = "dri2 fake front buffer";
951 break;
952
953 case __DRI_BUFFER_BACK_LEFT:
954 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
955 region_name = "dri2 back buffer";
956 break;
957
958 case __DRI_BUFFER_DEPTH:
959 case __DRI_BUFFER_HIZ:
960 case __DRI_BUFFER_DEPTH_STENCIL:
961 case __DRI_BUFFER_STENCIL:
962 case __DRI_BUFFER_ACCUM:
963 default:
964 fprintf(stderr,
965 "unhandled buffer attach event, attachment type %d\n",
966 buffers[i].attachment);
967 return;
968 }
969
970 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
971 }
972
973 driUpdateFramebufferSize(&brw->ctx, drawable);
974 }
975
976 /**
977 * intel_prepare_render should be called anywhere that curent read/drawbuffer
978 * state is required.
979 */
980 void
981 intel_prepare_render(struct brw_context *brw)
982 {
983 __DRIcontext *driContext = brw->driContext;
984 __DRIdrawable *drawable;
985
986 drawable = driContext->driDrawablePriv;
987 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
988 if (drawable->lastStamp != drawable->dri2.stamp)
989 intel_update_renderbuffers(driContext, drawable);
990 driContext->dri2.draw_stamp = drawable->dri2.stamp;
991 }
992
993 drawable = driContext->driReadablePriv;
994 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
995 if (drawable->lastStamp != drawable->dri2.stamp)
996 intel_update_renderbuffers(driContext, drawable);
997 driContext->dri2.read_stamp = drawable->dri2.stamp;
998 }
999
1000 /* If we're currently rendering to the front buffer, the rendering
1001 * that will happen next will probably dirty the front buffer. So
1002 * mark it as dirty here.
1003 */
1004 if (brw->is_front_buffer_rendering)
1005 brw->front_buffer_dirty = true;
1006
1007 /* Wait for the swapbuffers before the one we just emitted, so we
1008 * don't get too many swaps outstanding for apps that are GPU-heavy
1009 * but not CPU-heavy.
1010 *
1011 * We're using intelDRI2Flush (called from the loader before
1012 * swapbuffer) and glFlush (for front buffer rendering) as the
1013 * indicator that a frame is done and then throttle when we get
1014 * here as we prepare to render the next frame. At this point for
1015 * round trips for swap/copy and getting new buffers are done and
1016 * we'll spend less time waiting on the GPU.
1017 *
1018 * Unfortunately, we don't have a handle to the batch containing
1019 * the swap, and getting our hands on that doesn't seem worth it,
1020 * so we just us the first batch we emitted after the last swap.
1021 */
1022 if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
1023 if (!brw->disable_throttling)
1024 drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
1025 drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
1026 brw->first_post_swapbuffers_batch = NULL;
1027 brw->need_throttle = false;
1028 }
1029 }
1030
1031 /**
1032 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1033 *
1034 * To determine which DRI buffers to request, examine the renderbuffers
1035 * attached to the drawable's framebuffer. Then request the buffers with
1036 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1037 *
1038 * This is called from intel_update_renderbuffers().
1039 *
1040 * \param drawable Drawable whose buffers are queried.
1041 * \param buffers [out] List of buffers returned by DRI2 query.
1042 * \param buffer_count [out] Number of buffers returned.
1043 *
1044 * \see intel_update_renderbuffers()
1045 * \see DRI2GetBuffers()
1046 * \see DRI2GetBuffersWithFormat()
1047 */
1048 static void
1049 intel_query_dri2_buffers(struct brw_context *brw,
1050 __DRIdrawable *drawable,
1051 __DRIbuffer **buffers,
1052 int *buffer_count)
1053 {
1054 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1055 struct gl_framebuffer *fb = drawable->driverPrivate;
1056 int i = 0;
1057 unsigned attachments[8];
1058
1059 struct intel_renderbuffer *front_rb;
1060 struct intel_renderbuffer *back_rb;
1061
1062 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1063 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1064
1065 memset(attachments, 0, sizeof(attachments));
1066 if ((brw->is_front_buffer_rendering ||
1067 brw->is_front_buffer_reading ||
1068 !back_rb) && front_rb) {
1069 /* If a fake front buffer is in use, then querying for
1070 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1071 * the real front buffer to the fake front buffer. So before doing the
1072 * query, we need to make sure all the pending drawing has landed in the
1073 * real front buffer.
1074 */
1075 intel_batchbuffer_flush(brw);
1076 intel_flush_front(&brw->ctx);
1077
1078 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1079 attachments[i++] = intel_bits_per_pixel(front_rb);
1080 } else if (front_rb && brw->front_buffer_dirty) {
1081 /* We have pending front buffer rendering, but we aren't querying for a
1082 * front buffer. If the front buffer we have is a fake front buffer,
1083 * the X server is going to throw it away when it processes the query.
1084 * So before doing the query, make sure all the pending drawing has
1085 * landed in the real front buffer.
1086 */
1087 intel_batchbuffer_flush(brw);
1088 intel_flush_front(&brw->ctx);
1089 }
1090
1091 if (back_rb) {
1092 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1093 attachments[i++] = intel_bits_per_pixel(back_rb);
1094 }
1095
1096 assert(i <= ARRAY_SIZE(attachments));
1097
1098 *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1099 &drawable->w,
1100 &drawable->h,
1101 attachments, i / 2,
1102 buffer_count,
1103 drawable->loaderPrivate);
1104 }
1105
1106 /**
1107 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1108 *
1109 * This is called from intel_update_renderbuffers().
1110 *
1111 * \par Note:
1112 * DRI buffers whose attachment point is DRI2BufferStencil or
1113 * DRI2BufferDepthStencil are handled as special cases.
1114 *
1115 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1116 * that is passed to intel_region_alloc_for_handle().
1117 *
1118 * \see intel_update_renderbuffers()
1119 * \see intel_region_alloc_for_handle()
1120 */
1121 static void
1122 intel_process_dri2_buffer(struct brw_context *brw,
1123 __DRIdrawable *drawable,
1124 __DRIbuffer *buffer,
1125 struct intel_renderbuffer *rb,
1126 const char *buffer_name)
1127 {
1128 struct intel_region *region = NULL;
1129
1130 if (!rb)
1131 return;
1132
1133 unsigned num_samples = rb->Base.Base.NumSamples;
1134
1135 /* We try to avoid closing and reopening the same BO name, because the first
1136 * use of a mapping of the buffer involves a bunch of page faulting which is
1137 * moderately expensive.
1138 */
1139 if (num_samples == 0) {
1140 if (rb->mt &&
1141 rb->mt->region &&
1142 rb->mt->region->name == buffer->name)
1143 return;
1144 } else {
1145 if (rb->mt &&
1146 rb->mt->singlesample_mt &&
1147 rb->mt->singlesample_mt->region &&
1148 rb->mt->singlesample_mt->region->name == buffer->name)
1149 return;
1150 }
1151
1152 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1153 fprintf(stderr,
1154 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1155 buffer->name, buffer->attachment,
1156 buffer->cpp, buffer->pitch);
1157 }
1158
1159 intel_miptree_release(&rb->mt);
1160 region = intel_region_alloc_for_handle(brw->intelScreen,
1161 buffer->cpp,
1162 drawable->w,
1163 drawable->h,
1164 buffer->pitch,
1165 buffer->name,
1166 buffer_name);
1167 if (!region)
1168 return;
1169
1170 rb->mt = intel_miptree_create_for_dri2_buffer(brw,
1171 buffer->attachment,
1172 intel_rb_format(rb),
1173 num_samples,
1174 region);
1175 intel_region_release(&region);
1176 }