Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / gallium / drivers / nvfx / nvfx_screen.c
1 #include "pipe/p_screen.h"
2 #include "pipe/p_state.h"
3 #include "util/u_format_s3tc.h"
4 #include "util/u_simple_screen.h"
5
6 #include "nouveau/nouveau_screen.h"
7
8 #include "nvfx_context.h"
9 #include "nvfx_screen.h"
10 #include "nvfx_resource.h"
11
12 #define NV30TCL_CHIPSET_3X_MASK 0x00000003
13 #define NV34TCL_CHIPSET_3X_MASK 0x00000010
14 #define NV35TCL_CHIPSET_3X_MASK 0x000001e0
15
16 /* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h
17 * to get the pointer to the context front buffer, so I copied nouveau_winsys here.
18 * nv30_screen_surface_format_supported() can then use it to enforce creating fbo
19 * with same number of bits everywhere.
20 */
21 struct nouveau_winsys {
22 struct pipe_winsys base;
23
24 struct pipe_screen *pscreen;
25
26 struct pipe_surface *front;
27 };
28 #define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
29 #define NV4X_GRCLASS4497_CHIPSETS 0x00005450
30 #define NV6X_GRCLASS4497_CHIPSETS 0x00000088
31
32 static int
33 nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
34 {
35 struct nvfx_screen *screen = nvfx_screen(pscreen);
36
37 switch (param) {
38 case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
39 /* TODO: check this */
40 return screen->is_nv4x ? 16 : 8;
41 case PIPE_CAP_NPOT_TEXTURES:
42 return !!screen->is_nv4x;
43 case PIPE_CAP_TWO_SIDED_STENCIL:
44 return 1;
45 case PIPE_CAP_GLSL:
46 return 0;
47 case PIPE_CAP_ANISOTROPIC_FILTER:
48 return 1;
49 case PIPE_CAP_POINT_SPRITE:
50 return 1;
51 case PIPE_CAP_MAX_RENDER_TARGETS:
52 return screen->is_nv4x ? 4 : 2;
53 case PIPE_CAP_OCCLUSION_QUERY:
54 return 1;
55 case PIPE_CAP_TIMER_QUERY:
56 return 0;
57 case PIPE_CAP_TEXTURE_SHADOW_MAP:
58 return 1;
59 case PIPE_CAP_TEXTURE_SWIZZLE:
60 return 1;
61 case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
62 return 13;
63 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
64 return 10;
65 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
66 return 13;
67 case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
68 return !!screen->is_nv4x;
69 case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
70 return 1;
71 case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
72 return 0; /* We have 4 on nv40 - but unsupported currently */
73 case PIPE_CAP_TGSI_CONT_SUPPORTED:
74 return 0;
75 case PIPE_CAP_BLEND_EQUATION_SEPARATE:
76 return !!screen->is_nv4x;
77 case PIPE_CAP_MAX_COMBINED_SAMPLERS:
78 return 16;
79 case PIPE_CAP_INDEP_BLEND_ENABLE:
80 /* TODO: on nv40 we have separate color masks */
81 /* TODO: nv40 mrt blending is probably broken */
82 return 0;
83 case PIPE_CAP_INDEP_BLEND_FUNC:
84 return 0;
85 case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
86 return 0;
87 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
88 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
89 return 1;
90 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
91 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
92 return 0;
93 case PIPE_CAP_MAX_FS_INSTRUCTIONS:
94 case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS:
95 case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS:
96 case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS:
97 return 4096;
98 case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH:
99 /* FIXME: is it the dynamic (nv30:0/nv40:24) or the static
100 value (nv30:0/nv40:4) ? */
101 return screen->is_nv4x ? 4 : 0;
102 case PIPE_CAP_MAX_FS_INPUTS:
103 return 10;
104 case PIPE_CAP_MAX_FS_CONSTS:
105 return screen->is_nv4x ? 224 : 32;
106 case PIPE_CAP_MAX_FS_TEMPS:
107 return 32;
108 case PIPE_CAP_MAX_FS_ADDRS:
109 return screen->is_nv4x ? 1 : 0;
110 case PIPE_CAP_MAX_FS_PREDS:
111 return screen->is_nv4x ? 1 : 0;
112 case PIPE_CAP_MAX_VS_INSTRUCTIONS:
113 case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS:
114 return screen->is_nv4x ? 512 : 256;
115 case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS:
116 case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS:
117 return screen->is_nv4x ? 512 : 0;
118 case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH:
119 /* FIXME: is it the dynamic (nv30:24/nv40:24) or the static
120 value (nv30:1/nv40:4) ? */
121 return screen->is_nv4x ? 4 : 1;
122 case PIPE_CAP_MAX_VS_INPUTS:
123 return 16;
124 case PIPE_CAP_MAX_VS_CONSTS:
125 return 256;
126 case PIPE_CAP_MAX_VS_TEMPS:
127 return screen->is_nv4x ? 32 : 13;
128 case PIPE_CAP_MAX_VS_ADDRS:
129 return 2;
130 case PIPE_CAP_MAX_VS_PREDS:
131 return screen->is_nv4x ? 1 : 0;
132 case PIPE_CAP_GEOMETRY_SHADER4:
133 return 0;
134 case PIPE_CAP_DEPTH_CLAMP:
135 return 0; // TODO: implement depth clamp
136 default:
137 NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
138 return 0;
139 }
140 }
141
142 static float
143 nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
144 {
145 struct nvfx_screen *screen = nvfx_screen(pscreen);
146
147 switch (param) {
148 case PIPE_CAP_MAX_LINE_WIDTH:
149 case PIPE_CAP_MAX_LINE_WIDTH_AA:
150 return 10.0;
151 case PIPE_CAP_MAX_POINT_WIDTH:
152 case PIPE_CAP_MAX_POINT_WIDTH_AA:
153 return 64.0;
154 case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
155 return screen->is_nv4x ? 16.0 : 8.0;
156 case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
157 return screen->is_nv4x ? 16.0 : 4.0;
158 default:
159 NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
160 return 0.0;
161 }
162 }
163
164 static boolean
165 nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
166 enum pipe_format format,
167 enum pipe_texture_target target,
168 unsigned sample_count,
169 unsigned tex_usage, unsigned geom_flags)
170 {
171 struct nvfx_screen *screen = nvfx_screen(pscreen);
172 struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;
173
174 if (sample_count > 1)
175 return FALSE;
176
177 if (tex_usage & PIPE_BIND_RENDER_TARGET) {
178 switch (format) {
179 case PIPE_FORMAT_B8G8R8A8_UNORM:
180 case PIPE_FORMAT_B8G8R8X8_UNORM:
181 case PIPE_FORMAT_B5G6R5_UNORM:
182 return TRUE;
183 default:
184 break;
185 }
186 } else
187 if (tex_usage & PIPE_BIND_DEPTH_STENCIL) {
188 switch (format) {
189 case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
190 case PIPE_FORMAT_X8Z24_UNORM:
191 return TRUE;
192 case PIPE_FORMAT_Z16_UNORM:
193 /* TODO: this nv30 limitation probably does not exist */
194 if (!screen->is_nv4x && front)
195 return (front->format == PIPE_FORMAT_B5G6R5_UNORM);
196 return TRUE;
197 default:
198 break;
199 }
200 } else {
201 switch (format) {
202 if (tex_usage & PIPE_BIND_SAMPLER_VIEW) {
203 switch (format) {
204 case PIPE_FORMAT_DXT1_RGB:
205 case PIPE_FORMAT_DXT1_RGBA:
206 case PIPE_FORMAT_DXT3_RGBA:
207 case PIPE_FORMAT_DXT5_RGBA:
208 return util_format_s3tc_enabled;
209 default:
210 break;
211 }
212 }
213 case PIPE_FORMAT_B8G8R8A8_UNORM:
214 case PIPE_FORMAT_B8G8R8X8_UNORM:
215 case PIPE_FORMAT_B5G5R5A1_UNORM:
216 case PIPE_FORMAT_B4G4R4A4_UNORM:
217 case PIPE_FORMAT_B5G6R5_UNORM:
218 case PIPE_FORMAT_L8_UNORM:
219 case PIPE_FORMAT_A8_UNORM:
220 case PIPE_FORMAT_I8_UNORM:
221 case PIPE_FORMAT_L8A8_UNORM:
222 case PIPE_FORMAT_Z16_UNORM:
223 case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
224 return TRUE;
225 /* TODO: does nv30 support this? */
226 case PIPE_FORMAT_R16_SNORM:
227 return !!screen->is_nv4x;
228 default:
229 break;
230 }
231 }
232
233 return FALSE;
234 }
235
236
237 static void
238 nvfx_screen_destroy(struct pipe_screen *pscreen)
239 {
240 struct nvfx_screen *screen = nvfx_screen(pscreen);
241
242 nouveau_resource_destroy(&screen->vp_exec_heap);
243 nouveau_resource_destroy(&screen->vp_data_heap);
244 nouveau_resource_destroy(&screen->query_heap);
245 nouveau_notifier_free(&screen->query);
246 nouveau_notifier_free(&screen->sync);
247 nouveau_grobj_free(&screen->eng3d);
248 nv04_surface_2d_takedown(&screen->eng2d);
249
250 nouveau_screen_fini(&screen->base);
251
252 FREE(pscreen);
253 }
254
255 static void nv30_screen_init(struct nvfx_screen *screen)
256 {
257 struct nouveau_channel *chan = screen->base.channel;
258 int i;
259
260 /* TODO: perhaps we should do some of this on nv40 too? */
261 for (i=1; i<8; i++) {
262 OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1));
263 OUT_RING(chan, 0);
264 OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_VERT(i), 1));
265 OUT_RING(chan, 0);
266 }
267
268 OUT_RING(chan, RING_3D(0x220, 1));
269 OUT_RING(chan, 1);
270
271 OUT_RING(chan, RING_3D(0x03b0, 1));
272 OUT_RING(chan, 0x00100000);
273 OUT_RING(chan, RING_3D(0x1454, 1));
274 OUT_RING(chan, 0);
275 OUT_RING(chan, RING_3D(0x1d80, 1));
276 OUT_RING(chan, 3);
277 OUT_RING(chan, RING_3D(0x1450, 1));
278 OUT_RING(chan, 0x00030004);
279
280 /* NEW */
281 OUT_RING(chan, RING_3D(0x1e98, 1));
282 OUT_RING(chan, 0);
283 OUT_RING(chan, RING_3D(0x17e0, 3));
284 OUT_RING(chan, fui(0.0));
285 OUT_RING(chan, fui(0.0));
286 OUT_RING(chan, fui(1.0));
287 OUT_RING(chan, RING_3D(0x1f80, 16));
288 for (i=0; i<16; i++) {
289 OUT_RING(chan, (i==8) ? 0x0000ffff : 0);
290 }
291
292 OUT_RING(chan, RING_3D(0x120, 3));
293 OUT_RING(chan, 0);
294 OUT_RING(chan, 1);
295 OUT_RING(chan, 2);
296
297 OUT_RING(chan, RING_3D(0x1d88, 1));
298 OUT_RING(chan, 0x00001200);
299
300 OUT_RING(chan, RING_3D(NV34TCL_RC_ENABLE, 1));
301 OUT_RING(chan, 0);
302
303 OUT_RING(chan, RING_3D(NV34TCL_DEPTH_RANGE_NEAR, 2));
304 OUT_RING(chan, fui(0.0));
305 OUT_RING(chan, fui(1.0));
306
307 OUT_RING(chan, RING_3D(NV34TCL_MULTISAMPLE_CONTROL, 1));
308 OUT_RING(chan, 0xffff0000);
309
310 /* enables use of vp rather than fixed-function somehow */
311 OUT_RING(chan, RING_3D(0x1e94, 1));
312 OUT_RING(chan, 0x13);
313 }
314
315 static void nv40_screen_init(struct nvfx_screen *screen)
316 {
317 struct nouveau_channel *chan = screen->base.channel;
318
319 OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 2));
320 OUT_RING(chan, screen->base.channel->vram->handle);
321 OUT_RING(chan, screen->base.channel->vram->handle);
322
323 OUT_RING(chan, RING_3D(0x1ea4, 3));
324 OUT_RING(chan, 0x00000010);
325 OUT_RING(chan, 0x01000100);
326 OUT_RING(chan, 0xff800006);
327
328 /* vtxprog output routing */
329 OUT_RING(chan, RING_3D(0x1fc4, 1));
330 OUT_RING(chan, 0x06144321);
331 OUT_RING(chan, RING_3D(0x1fc8, 2));
332 OUT_RING(chan, 0xedcba987);
333 OUT_RING(chan, 0x00000021);
334 OUT_RING(chan, RING_3D(0x1fd0, 1));
335 OUT_RING(chan, 0x00171615);
336 OUT_RING(chan, RING_3D(0x1fd4, 1));
337 OUT_RING(chan, 0x001b1a19);
338
339 OUT_RING(chan, RING_3D(0x1ef8, 1));
340 OUT_RING(chan, 0x0020ffff);
341 OUT_RING(chan, RING_3D(0x1d64, 1));
342 OUT_RING(chan, 0x00d30000);
343 OUT_RING(chan, RING_3D(0x1e94, 1));
344 OUT_RING(chan, 0x00000001);
345 }
346
347 static unsigned
348 nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen* screen)
349 {
350 int vram_hack_default = 0;
351 int vram_hack;
352 // TODO: this is a bit of a guess; also add other cards that may need this hack.
353 // It may also depend on the specific card or the AGP/PCIe chipset.
354 if(screen->base.device->chipset == 0x47 /* G70 */
355 || screen->base.device->chipset == 0x49 /* G71 */
356 || screen->base.device->chipset == 0x46 /* G72 */
357 )
358 vram_hack_default = 1;
359 vram_hack = debug_get_bool_option("NOUVEAU_VTXIDX_IN_VRAM", vram_hack_default);
360
361 #ifdef DEBUG
362 if(!vram_hack)
363 {
364 fprintf(stderr, "Some systems may experience graphics corruption due to randomly misplaced vertices.\n"
365 "If this is happening, export NOUVEAU_VTXIDX_IN_VRAM=1 may reduce or eliminate the problem\n");
366 }
367 else
368 {
369 fprintf(stderr, "A performance reducing hack is being used to help avoid graphics corruption.\n"
370 "You can try export NOUVEAU_VTXIDX_IN_VRAM=0 to disable it.\n");
371 }
372 #endif
373
374 return vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART;
375 }
376
377 struct pipe_screen *
378 nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
379 {
380 static const unsigned query_sizes[] = {(4096 - 4 * 32) / 32, 3 * 1024 / 32, 2 * 1024 / 32, 1024 / 32};
381 struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen);
382 struct nouveau_channel *chan;
383 struct pipe_screen *pscreen;
384 unsigned eng3d_class = 0;
385 int ret, i;
386
387 if (!screen)
388 return NULL;
389
390 pscreen = &screen->base.base;
391
392 ret = nouveau_screen_init(&screen->base, dev);
393 if (ret) {
394 nvfx_screen_destroy(pscreen);
395 return NULL;
396 }
397 chan = screen->base.channel;
398
399 pscreen->winsys = ws;
400 pscreen->destroy = nvfx_screen_destroy;
401 pscreen->get_param = nvfx_screen_get_param;
402 pscreen->get_paramf = nvfx_screen_get_paramf;
403 pscreen->is_format_supported = nvfx_screen_surface_format_supported;
404 pscreen->context_create = nvfx_create;
405
406 switch (dev->chipset & 0xf0) {
407 case 0x30:
408 if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
409 eng3d_class = 0x0397;
410 else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
411 eng3d_class = 0x0697;
412 else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
413 eng3d_class = 0x0497;
414 break;
415 case 0x40:
416 if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f)))
417 eng3d_class = NV40TCL;
418 else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
419 eng3d_class = NV44TCL;
420 screen->is_nv4x = ~0;
421 break;
422 case 0x60:
423 if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
424 eng3d_class = NV44TCL;
425 screen->is_nv4x = ~0;
426 break;
427 }
428
429 if (!eng3d_class) {
430 NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset);
431 return NULL;
432 }
433
434 screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE);
435
436 screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen);
437
438 /* surely both nv3x and nv44 support index buffers too: find out how and test that */
439 if(eng3d_class == NV40TCL)
440 screen->index_buffer_reloc_flags = screen->vertex_buffer_reloc_flags;
441
442 if(!screen->force_swtnl && screen->vertex_buffer_reloc_flags == screen->index_buffer_reloc_flags)
443 screen->base.vertex_buffer_flags = screen->base.index_buffer_flags = screen->vertex_buffer_reloc_flags;
444
445 nvfx_screen_init_resource_functions(pscreen);
446
447 ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d);
448 if (ret) {
449 NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
450 return FALSE;
451 }
452
453 /* 2D engine setup */
454 screen->eng2d = nv04_surface_2d_init(&screen->base);
455 screen->eng2d->buf = nvfx_surface_buffer;
456
457 /* Notifier for sync purposes */
458 ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
459 if (ret) {
460 NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
461 nvfx_screen_destroy(pscreen);
462 return NULL;
463 }
464
465 /* Query objects */
466 for(i = 0; i < sizeof(query_sizes) / sizeof(query_sizes[0]); ++i)
467 {
468 ret = nouveau_notifier_alloc(chan, 0xbeef0302, query_sizes[i], &screen->query);
469 if(!ret)
470 break;
471 }
472
473 if (ret) {
474 NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
475 nvfx_screen_destroy(pscreen);
476 return NULL;
477 }
478
479 ret = nouveau_resource_init(&screen->query_heap, 0, query_sizes[i]);
480 if (ret) {
481 NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
482 nvfx_screen_destroy(pscreen);
483 return NULL;
484 }
485
486 LIST_INITHEAD(&screen->query_list);
487
488 /* Vtxprog resources */
489 if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) ||
490 nouveau_resource_init(&screen->vp_data_heap, 0, 256)) {
491 nvfx_screen_destroy(pscreen);
492 return NULL;
493 }
494
495 BIND_RING(chan, screen->eng3d, 7);
496
497 /* Static eng3d initialisation */
498 /* note that we just started using the channel, so we must have space in the pushbuffer */
499 OUT_RING(chan, RING_3D(NV34TCL_DMA_NOTIFY, 1));
500 OUT_RING(chan, screen->sync->handle);
501 OUT_RING(chan, RING_3D(NV34TCL_DMA_TEXTURE0, 2));
502 OUT_RING(chan, chan->vram->handle);
503 OUT_RING(chan, chan->gart->handle);
504 OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1));
505 OUT_RING(chan, chan->vram->handle);
506 OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 2));
507 OUT_RING(chan, chan->vram->handle);
508 OUT_RING(chan, chan->vram->handle);
509 OUT_RING(chan, RING_3D(NV34TCL_DMA_VTXBUF0, 2));
510 OUT_RING(chan, chan->vram->handle);
511 OUT_RING(chan, chan->gart->handle);
512
513 OUT_RING(chan, RING_3D(NV34TCL_DMA_FENCE, 2));
514 OUT_RING(chan, 0);
515 OUT_RING(chan, screen->query->handle);
516
517 OUT_RING(chan, RING_3D(NV34TCL_DMA_IN_MEMORY7, 2));
518 OUT_RING(chan, chan->vram->handle);
519 OUT_RING(chan, chan->vram->handle);
520
521 if(!screen->is_nv4x)
522 nv30_screen_init(screen);
523 else
524 nv40_screen_init(screen);
525
526 return pscreen;
527 }