i965/fs: Add support for translating ir_triop_fma into MAD.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/api_exec.h"
34 #include "main/imports.h"
35 #include "main/macros.h"
36 #include "main/points.h"
37 #include "main/simple_list.h"
38 #include "main/version.h"
39 #include "main/vtxfmt.h"
40
41 #include "vbo/vbo_context.h"
42
43 #include "brw_context.h"
44 #include "brw_defines.h"
45 #include "brw_draw.h"
46 #include "brw_state.h"
47
48 #include "intel_fbo.h"
49 #include "intel_mipmap_tree.h"
50 #include "intel_regions.h"
51 #include "intel_tex.h"
52 #include "intel_tex_obj.h"
53
54 #include "tnl/t_pipeline.h"
55 #include "glsl/ralloc.h"
56
57 /***************************************
58 * Mesa's Driver Functions
59 ***************************************/
60
61 static size_t
62 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
63 GLenum internalFormat, int samples[16])
64 {
65 struct brw_context *brw = brw_context(ctx);
66
67 (void) target;
68
69 switch (brw->gen) {
70 case 7:
71 samples[0] = 8;
72 samples[1] = 4;
73 return 2;
74
75 case 6:
76 samples[0] = 4;
77 return 1;
78
79 default:
80 samples[0] = 1;
81 return 1;
82 }
83 }
84
85 static void brwInitDriverFunctions(struct intel_screen *screen,
86 struct dd_function_table *functions)
87 {
88 intelInitDriverFunctions( functions );
89
90 brwInitFragProgFuncs( functions );
91 brw_init_common_queryobj_functions(functions);
92 if (screen->gen >= 6)
93 gen6_init_queryobj_functions(functions);
94 else
95 gen4_init_queryobj_functions(functions);
96
97 functions->QuerySamplesForFormat = brw_query_samples_for_format;
98
99 if (screen->gen >= 7) {
100 functions->BeginTransformFeedback = gen7_begin_transform_feedback;
101 functions->EndTransformFeedback = gen7_end_transform_feedback;
102 } else {
103 functions->BeginTransformFeedback = brw_begin_transform_feedback;
104 functions->EndTransformFeedback = brw_end_transform_feedback;
105 }
106
107 if (screen->gen >= 6)
108 functions->GetSamplePosition = gen6_get_sample_position;
109 }
110
111 static void
112 brw_initialize_context_constants(struct brw_context *brw)
113 {
114 struct gl_context *ctx = &brw->ctx;
115
116 ctx->Const.QueryCounterBits.Timestamp = 36;
117
118 ctx->Const.StripTextureBorder = true;
119
120 ctx->Const.MaxDualSourceDrawBuffers = 1;
121 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
122 ctx->Const.FragmentProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
123 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
124 ctx->Const.MaxTextureUnits =
125 MIN2(ctx->Const.MaxTextureCoordUnits,
126 ctx->Const.FragmentProgram.MaxTextureImageUnits);
127 ctx->Const.VertexProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
128 ctx->Const.MaxCombinedTextureImageUnits =
129 ctx->Const.VertexProgram.MaxTextureImageUnits +
130 ctx->Const.FragmentProgram.MaxTextureImageUnits;
131
132 ctx->Const.MaxTextureLevels = 14; /* 8192 */
133 if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
134 ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
135 ctx->Const.Max3DTextureLevels = 9;
136 ctx->Const.MaxCubeTextureLevels = 12;
137
138 if (brw->gen >= 7)
139 ctx->Const.MaxArrayTextureLayers = 2048;
140 else
141 ctx->Const.MaxArrayTextureLayers = 512;
142
143 ctx->Const.MaxTextureRectSize = 1 << 12;
144
145 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
146
147 ctx->Const.MaxRenderbufferSize = 8192;
148
149 /* Hardware only supports a limited number of transform feedback buffers.
150 * So we need to override the Mesa default (which is based only on software
151 * limits).
152 */
153 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
154
155 /* On Gen6, in the worst case, we use up one binding table entry per
156 * transform feedback component (see comments above the definition of
157 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
158 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
159 * BRW_MAX_SOL_BINDINGS.
160 *
161 * In "separate components" mode, we need to divide this value by
162 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
163 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
164 */
165 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
166 ctx->Const.MaxTransformFeedbackSeparateComponents =
167 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
168
169 if (brw->gen == 6) {
170 ctx->Const.MaxSamples = 4;
171 ctx->Const.MaxColorTextureSamples = 4;
172 ctx->Const.MaxDepthTextureSamples = 4;
173 ctx->Const.MaxIntegerSamples = 4;
174 } else if (brw->gen >= 7) {
175 ctx->Const.MaxSamples = 8;
176 ctx->Const.MaxColorTextureSamples = 8;
177 ctx->Const.MaxDepthTextureSamples = 8;
178 ctx->Const.MaxIntegerSamples = 8;
179 }
180
181 ctx->Const.MinLineWidth = 1.0;
182 ctx->Const.MinLineWidthAA = 1.0;
183 ctx->Const.MaxLineWidth = 5.0;
184 ctx->Const.MaxLineWidthAA = 5.0;
185 ctx->Const.LineWidthGranularity = 0.5;
186
187 ctx->Const.MinPointSize = 1.0;
188 ctx->Const.MinPointSizeAA = 1.0;
189 ctx->Const.MaxPointSize = 255.0;
190 ctx->Const.MaxPointSizeAA = 255.0;
191 ctx->Const.PointSizeGranularity = 1.0;
192
193 if (brw->gen >= 5 || brw->is_g4x)
194 ctx->Const.MaxClipPlanes = 8;
195
196 ctx->Const.VertexProgram.MaxNativeInstructions = 16 * 1024;
197 ctx->Const.VertexProgram.MaxAluInstructions = 0;
198 ctx->Const.VertexProgram.MaxTexInstructions = 0;
199 ctx->Const.VertexProgram.MaxTexIndirections = 0;
200 ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
201 ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
202 ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
203 ctx->Const.VertexProgram.MaxNativeAttribs = 16;
204 ctx->Const.VertexProgram.MaxNativeTemps = 256;
205 ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
206 ctx->Const.VertexProgram.MaxNativeParameters = 1024;
207 ctx->Const.VertexProgram.MaxEnvParams =
208 MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
209 ctx->Const.VertexProgram.MaxEnvParams);
210
211 ctx->Const.FragmentProgram.MaxNativeInstructions = 1024;
212 ctx->Const.FragmentProgram.MaxNativeAluInstructions = 1024;
213 ctx->Const.FragmentProgram.MaxNativeTexInstructions = 1024;
214 ctx->Const.FragmentProgram.MaxNativeTexIndirections = 1024;
215 ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
216 ctx->Const.FragmentProgram.MaxNativeTemps = 256;
217 ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
218 ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
219 ctx->Const.FragmentProgram.MaxEnvParams =
220 MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
221 ctx->Const.FragmentProgram.MaxEnvParams);
222
223 /* Fragment shaders use real, 32-bit twos-complement integers for all
224 * integer types.
225 */
226 ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
227 ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
228 ctx->Const.FragmentProgram.LowInt.Precision = 0;
229 ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.LowInt;
230 ctx->Const.FragmentProgram.MediumInt = ctx->Const.FragmentProgram.LowInt;
231
232 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
233 * but we're not sure how it's actually done for vertex order,
234 * that affect provoking vertex decision. Always use last vertex
235 * convention for quad primitive which works as expected for now.
236 */
237 if (brw->gen >= 6)
238 ctx->Const.QuadsFollowProvokingVertexConvention = false;
239
240 ctx->Const.NativeIntegers = true;
241 ctx->Const.UniformBooleanTrue = 1;
242 ctx->Const.UniformBufferOffsetAlignment = 16;
243
244 ctx->Const.ForceGLSLExtensionsWarn =
245 driQueryOptionb(&brw->optionCache, "force_glsl_extensions_warn");
246
247 ctx->Const.DisableGLSLLineContinuations =
248 driQueryOptionb(&brw->optionCache, "disable_glsl_line_continuations");
249
250 /* We want the GLSL compiler to emit code that uses condition codes */
251 for (int i = 0; i < MESA_SHADER_TYPES; i++) {
252 ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
253 ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
254 ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
255 ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
256 ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
257 ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
258
259 ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
260 (i == MESA_SHADER_FRAGMENT);
261 ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
262 (i == MESA_SHADER_FRAGMENT);
263 ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
264 }
265
266 ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = true;
267 }
268
269 bool
270 brwCreateContext(int api,
271 const struct gl_config *mesaVis,
272 __DRIcontext *driContextPriv,
273 unsigned major_version,
274 unsigned minor_version,
275 uint32_t flags,
276 unsigned *error,
277 void *sharedContextPrivate)
278 {
279 __DRIscreen *sPriv = driContextPriv->driScreenPriv;
280 struct intel_screen *screen = sPriv->driverPrivate;
281 struct dd_function_table functions;
282
283 struct brw_context *brw = rzalloc(NULL, struct brw_context);
284 if (!brw) {
285 printf("%s: failed to alloc context\n", __FUNCTION__);
286 *error = __DRI_CTX_ERROR_NO_MEMORY;
287 return false;
288 }
289
290 /* brwInitVtbl needs to know the chipset generation so that it can set the
291 * right pointers.
292 */
293 brw->gen = screen->gen;
294
295 brwInitVtbl( brw );
296
297 brwInitDriverFunctions(screen, &functions);
298
299 struct gl_context *ctx = &brw->ctx;
300
301 if (!intelInitContext( brw, api, major_version, minor_version,
302 mesaVis, driContextPriv,
303 sharedContextPrivate, &functions,
304 error)) {
305 ralloc_free(brw);
306 return false;
307 }
308
309 brw_initialize_context_constants(brw);
310
311 /* Reinitialize the context point state. It depends on ctx->Const values. */
312 _mesa_init_point(ctx);
313
314 if (brw->gen >= 6) {
315 /* Create a new hardware context. Using a hardware context means that
316 * our GPU state will be saved/restored on context switch, allowing us
317 * to assume that the GPU is in the same state we left it in.
318 *
319 * This is required for transform feedback buffer offsets, query objects,
320 * and also allows us to reduce how much state we have to emit.
321 */
322 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
323
324 if (!brw->hw_ctx) {
325 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
326 ralloc_free(brw);
327 return false;
328 }
329 }
330
331 brw_init_surface_formats(brw);
332
333 /* Initialize swrast, tnl driver tables: */
334 TNLcontext *tnl = TNL_CONTEXT(ctx);
335 if (tnl)
336 tnl->Driver.RunPipeline = _tnl_run_pipeline;
337
338 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
339 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
340 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
341
342 if (brw->is_g4x || brw->gen >= 5) {
343 brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
344 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
345 brw->has_surface_tile_offset = true;
346 if (brw->gen < 6)
347 brw->has_compr4 = true;
348 brw->has_aa_line_parameters = true;
349 brw->has_pln = true;
350 } else {
351 brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
352 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
353 }
354
355 /* WM maximum threads is number of EUs times number of threads per EU. */
356 assert(brw->gen <= 7);
357
358 if (brw->is_haswell) {
359 if (brw->gt == 1) {
360 brw->max_wm_threads = 102;
361 brw->max_vs_threads = 70;
362 brw->max_gs_threads = 70;
363 brw->urb.size = 128;
364 brw->urb.max_vs_entries = 640;
365 brw->urb.max_gs_entries = 256;
366 } else if (brw->gt == 2) {
367 brw->max_wm_threads = 204;
368 brw->max_vs_threads = 280;
369 brw->max_gs_threads = 256;
370 brw->urb.size = 256;
371 brw->urb.max_vs_entries = 1664;
372 brw->urb.max_gs_entries = 640;
373 } else if (brw->gt == 3) {
374 brw->max_wm_threads = 408;
375 brw->max_vs_threads = 280;
376 brw->max_gs_threads = 256;
377 brw->urb.size = 512;
378 brw->urb.max_vs_entries = 1664;
379 brw->urb.max_gs_entries = 640;
380 }
381 } else if (brw->gen == 7) {
382 if (brw->gt == 1) {
383 brw->max_wm_threads = 48;
384 brw->max_vs_threads = 36;
385 brw->max_gs_threads = 36;
386 brw->urb.size = 128;
387 brw->urb.max_vs_entries = 512;
388 brw->urb.max_gs_entries = 192;
389 } else if (brw->gt == 2) {
390 brw->max_wm_threads = 172;
391 brw->max_vs_threads = 128;
392 brw->max_gs_threads = 128;
393 brw->urb.size = 256;
394 brw->urb.max_vs_entries = 704;
395 brw->urb.max_gs_entries = 320;
396 } else {
397 assert(!"Unknown gen7 device.");
398 }
399 } else if (brw->gen == 6) {
400 if (brw->gt == 2) {
401 brw->max_wm_threads = 80;
402 brw->max_vs_threads = 60;
403 brw->max_gs_threads = 60;
404 brw->urb.size = 64; /* volume 5c.5 section 5.1 */
405 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
406 brw->urb.max_gs_entries = 256;
407 } else {
408 brw->max_wm_threads = 40;
409 brw->max_vs_threads = 24;
410 brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
411 brw->urb.size = 32; /* volume 5c.5 section 5.1 */
412 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
413 brw->urb.max_gs_entries = 256;
414 }
415 brw->urb.gen6_gs_previously_active = false;
416 } else if (brw->gen == 5) {
417 brw->urb.size = 1024;
418 brw->max_vs_threads = 72;
419 brw->max_gs_threads = 32;
420 brw->max_wm_threads = 12 * 6;
421 } else if (brw->is_g4x) {
422 brw->urb.size = 384;
423 brw->max_vs_threads = 32;
424 brw->max_gs_threads = 2;
425 brw->max_wm_threads = 10 * 5;
426 } else if (brw->gen < 6) {
427 brw->urb.size = 256;
428 brw->max_vs_threads = 16;
429 brw->max_gs_threads = 2;
430 brw->max_wm_threads = 8 * 4;
431 brw->has_negative_rhw_bug = true;
432 }
433
434 if (brw->gen <= 7) {
435 brw->needs_unlit_centroid_workaround = true;
436 }
437
438 brw->prim_restart.in_progress = false;
439 brw->prim_restart.enable_cut_index = false;
440
441 brw_init_state( brw );
442
443 if (brw->gen < 6) {
444 brw->curbe.last_buf = calloc(1, 4096);
445 brw->curbe.next_buf = calloc(1, 4096);
446 }
447
448 brw->state.dirty.mesa = ~0;
449 brw->state.dirty.brw = ~0;
450
451 /* Make sure that brw->state.dirty.brw has enough bits to hold all possible
452 * dirty flags.
453 */
454 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw));
455
456 brw->emit_state_always = 0;
457
458 brw->batch.need_workaround_flush = true;
459
460 ctx->VertexProgram._MaintainTnlProgram = true;
461 ctx->FragmentProgram._MaintainTexEnvProgram = true;
462
463 brw_draw_init( brw );
464
465 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
466
467 ctx->Const.ContextFlags = 0;
468 if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0)
469 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
470
471 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
472 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
473
474 /* Turn on some extra GL_ARB_debug_output generation. */
475 brw->perf_debug = true;
476 }
477
478 brw_fs_alloc_reg_sets(brw);
479 brw_vec4_alloc_reg_set(brw);
480
481 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
482 brw_init_shader_time(brw);
483
484 _mesa_compute_version(ctx);
485
486 _mesa_initialize_dispatch_tables(ctx);
487 _mesa_initialize_vbo_vtxfmt(ctx);
488
489 return true;
490 }
491