nvfx: support indirect addressing in vps
[mesa.git] / src / gallium / drivers / nvfx / nvfx_screen.c
1 #include "pipe/p_screen.h"
2 #include "pipe/p_state.h"
3 #include "util/u_format_s3tc.h"
4 #include "util/u_simple_screen.h"
5
6 #include "nouveau/nouveau_screen.h"
7
8 #include "nvfx_context.h"
9 #include "nvfx_screen.h"
10 #include "nvfx_resource.h"
11 #include "nvfx_tex.h"
12
13 #define NV30TCL_CHIPSET_3X_MASK 0x00000003
14 #define NV34TCL_CHIPSET_3X_MASK 0x00000010
15 #define NV35TCL_CHIPSET_3X_MASK 0x000001e0
16
17 #define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
18 #define NV4X_GRCLASS4497_CHIPSETS 0x00005450
19 #define NV6X_GRCLASS4497_CHIPSETS 0x00000088
20
21 static int
22 nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
23 {
24 struct nvfx_screen *screen = nvfx_screen(pscreen);
25
26 switch (param) {
27 case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
28 return 16;
29 case PIPE_CAP_NPOT_TEXTURES:
30 return !!screen->is_nv4x;
31 case PIPE_CAP_TWO_SIDED_STENCIL:
32 return 1;
33 case PIPE_CAP_GLSL:
34 return 1;
35 case PIPE_CAP_ANISOTROPIC_FILTER:
36 return 1;
37 case PIPE_CAP_POINT_SPRITE:
38 return 1;
39 case PIPE_CAP_MAX_RENDER_TARGETS:
40 return screen->is_nv4x ? 4 : 2;
41 case PIPE_CAP_OCCLUSION_QUERY:
42 return 1;
43 case PIPE_CAP_TIMER_QUERY:
44 return 0;
45 case PIPE_CAP_TEXTURE_SHADOW_MAP:
46 return 1;
47 case PIPE_CAP_TEXTURE_SWIZZLE:
48 return 1;
49 case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
50 return 13;
51 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
52 return 10;
53 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
54 return 13;
55 case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
56 return !!screen->is_nv4x;
57 case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
58 return 1;
59 case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
60 return 0; /* We have 4 on nv40 - but unsupported currently */
61 case PIPE_CAP_TGSI_CONT_SUPPORTED:
62 return 0;
63 case PIPE_CAP_BLEND_EQUATION_SEPARATE:
64 return !!screen->is_nv4x;
65 case PIPE_CAP_MAX_COMBINED_SAMPLERS:
66 return 16;
67 case PIPE_CAP_INDEP_BLEND_ENABLE:
68 /* TODO: on nv40 we have separate color masks */
69 /* TODO: nv40 mrt blending is probably broken */
70 return 0;
71 case PIPE_CAP_INDEP_BLEND_FUNC:
72 return 0;
73 case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
74 return 0;
75 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
76 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
77 return 1;
78 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
79 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
80 return 0;
81 case PIPE_CAP_MAX_FS_INSTRUCTIONS:
82 case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS:
83 case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS:
84 case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS:
85 return 4096;
86 case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH:
87 /* FIXME: is it the dynamic (nv30:0/nv40:24) or the static
88 value (nv30:0/nv40:4) ? */
89 return screen->is_nv4x ? 4 : 0;
90 case PIPE_CAP_MAX_FS_INPUTS:
91 return 10;
92 case PIPE_CAP_MAX_FS_CONSTS:
93 return screen->is_nv4x ? 224 : 32;
94 case PIPE_CAP_MAX_FS_TEMPS:
95 return 32;
96 case PIPE_CAP_MAX_FS_ADDRS:
97 return screen->is_nv4x ? 1 : 0;
98 case PIPE_CAP_MAX_FS_PREDS:
99 return screen->is_nv4x ? 1 : 0;
100 case PIPE_CAP_MAX_VS_INSTRUCTIONS:
101 case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS:
102 return screen->is_nv4x ? 512 : 256;
103 case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS:
104 case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS:
105 return screen->is_nv4x ? 512 : 0;
106 case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH:
107 /* FIXME: is it the dynamic (nv30:24/nv40:24) or the static
108 value (nv30:1/nv40:4) ? */
109 return screen->is_nv4x ? 4 : 1;
110 case PIPE_CAP_MAX_VS_INPUTS:
111 return 16;
112 case PIPE_CAP_MAX_VS_CONSTS:
113 /* XXX: currently more don't work, but it should be possible to make it work */
114 return 212 - 6;
115 case PIPE_CAP_MAX_VS_TEMPS:
116 return screen->is_nv4x ? 32 : 13;
117 case PIPE_CAP_MAX_VS_ADDRS:
118 return 2;
119 case PIPE_CAP_MAX_VS_PREDS:
120 return screen->is_nv4x ? 1 : 0;
121 case PIPE_CAP_GEOMETRY_SHADER4:
122 return 0;
123 case PIPE_CAP_DEPTH_CLAMP:
124 return 0; // TODO: implement depth clamp
125 default:
126 NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
127 return 0;
128 }
129 }
130
131 static float
132 nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
133 {
134 struct nvfx_screen *screen = nvfx_screen(pscreen);
135
136 switch (param) {
137 case PIPE_CAP_MAX_LINE_WIDTH:
138 case PIPE_CAP_MAX_LINE_WIDTH_AA:
139 return 10.0;
140 case PIPE_CAP_MAX_POINT_WIDTH:
141 case PIPE_CAP_MAX_POINT_WIDTH_AA:
142 return 64.0;
143 case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
144 return screen->is_nv4x ? 16.0 : 8.0;
145 case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
146 return screen->is_nv4x ? 16.0 : 4.0;
147 default:
148 NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
149 return 0.0;
150 }
151 }
152
153 static boolean
154 nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
155 enum pipe_format format,
156 enum pipe_texture_target target,
157 unsigned sample_count,
158 unsigned bind, unsigned geom_flags)
159 {
160 struct nvfx_screen *screen = nvfx_screen(pscreen);
161
162 if (sample_count > 1)
163 return FALSE;
164
165 if (bind & PIPE_BIND_RENDER_TARGET) {
166 switch (format) {
167 case PIPE_FORMAT_B8G8R8A8_UNORM:
168 case PIPE_FORMAT_B8G8R8X8_UNORM:
169 case PIPE_FORMAT_B5G6R5_UNORM:
170 break;
171 default:
172 return FALSE;
173 }
174 }
175
176 if (bind & PIPE_BIND_DEPTH_STENCIL) {
177 switch (format) {
178 case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
179 case PIPE_FORMAT_X8Z24_UNORM:
180 case PIPE_FORMAT_Z16_UNORM:
181 break;
182 default:
183 return FALSE;
184 }
185 }
186
187 if (bind & PIPE_BIND_SAMPLER_VIEW) {
188 struct nvfx_texture_format* tf = &nvfx_texture_formats[format];
189 if(util_format_is_s3tc(format) && !util_format_s3tc_enabled)
190 return FALSE;
191
192 if(screen->is_nv4x)
193 {
194 if(tf->fmt[4] < 0)
195 return FALSE;
196 }
197 else
198 {
199 if(tf->fmt[0] < 0)
200 return FALSE;
201 }
202 }
203
204 // note that we do actually support everything through translate
205 if (bind & PIPE_BIND_VERTEX_BUFFER) {
206 unsigned type = nvfx_vertex_formats[format];
207 if(!type)
208 return FALSE;
209 }
210
211 if (bind & PIPE_BIND_INDEX_BUFFER) {
212 // 8-bit indices supported, but not in hardware index buffer
213 if(format != PIPE_FORMAT_R16_USCALED && format != PIPE_FORMAT_R32_USCALED)
214 return FALSE;
215 }
216
217 if(bind & PIPE_BIND_STREAM_OUTPUT)
218 return FALSE;
219
220 return TRUE;
221 }
222
223 static void
224 nvfx_screen_destroy(struct pipe_screen *pscreen)
225 {
226 struct nvfx_screen *screen = nvfx_screen(pscreen);
227
228 nouveau_resource_destroy(&screen->vp_exec_heap);
229 nouveau_resource_destroy(&screen->vp_data_heap);
230 nouveau_resource_destroy(&screen->query_heap);
231 nouveau_notifier_free(&screen->query);
232 nouveau_notifier_free(&screen->sync);
233 nouveau_grobj_free(&screen->eng3d);
234 nvfx_screen_surface_takedown(pscreen);
235
236 nouveau_screen_fini(&screen->base);
237
238 FREE(pscreen);
239 }
240
241 static void nv30_screen_init(struct nvfx_screen *screen)
242 {
243 struct nouveau_channel *chan = screen->base.channel;
244 int i;
245
246 /* TODO: perhaps we should do some of this on nv40 too? */
247 for (i=1; i<8; i++) {
248 OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1));
249 OUT_RING(chan, 0);
250 OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_VERT(i), 1));
251 OUT_RING(chan, 0);
252 }
253
254 OUT_RING(chan, RING_3D(0x220, 1));
255 OUT_RING(chan, 1);
256
257 OUT_RING(chan, RING_3D(0x03b0, 1));
258 OUT_RING(chan, 0x00100000);
259 OUT_RING(chan, RING_3D(0x1454, 1));
260 OUT_RING(chan, 0);
261 OUT_RING(chan, RING_3D(0x1d80, 1));
262 OUT_RING(chan, 3);
263 OUT_RING(chan, RING_3D(0x1450, 1));
264 OUT_RING(chan, 0x00030004);
265
266 /* NEW */
267 OUT_RING(chan, RING_3D(0x1e98, 1));
268 OUT_RING(chan, 0);
269 OUT_RING(chan, RING_3D(0x17e0, 3));
270 OUT_RING(chan, fui(0.0));
271 OUT_RING(chan, fui(0.0));
272 OUT_RING(chan, fui(1.0));
273 OUT_RING(chan, RING_3D(0x1f80, 16));
274 for (i=0; i<16; i++) {
275 OUT_RING(chan, (i==8) ? 0x0000ffff : 0);
276 }
277
278 OUT_RING(chan, RING_3D(0x120, 3));
279 OUT_RING(chan, 0);
280 OUT_RING(chan, 1);
281 OUT_RING(chan, 2);
282
283 OUT_RING(chan, RING_3D(0x1d88, 1));
284 OUT_RING(chan, 0x00001200);
285
286 OUT_RING(chan, RING_3D(NV34TCL_RC_ENABLE, 1));
287 OUT_RING(chan, 0);
288
289 OUT_RING(chan, RING_3D(NV34TCL_DEPTH_RANGE_NEAR, 2));
290 OUT_RING(chan, fui(0.0));
291 OUT_RING(chan, fui(1.0));
292
293 OUT_RING(chan, RING_3D(NV34TCL_MULTISAMPLE_CONTROL, 1));
294 OUT_RING(chan, 0xffff0000);
295
296 /* enables use of vp rather than fixed-function somehow */
297 OUT_RING(chan, RING_3D(0x1e94, 1));
298 OUT_RING(chan, 0x13);
299 }
300
301 static void nv40_screen_init(struct nvfx_screen *screen)
302 {
303 struct nouveau_channel *chan = screen->base.channel;
304
305 OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 2));
306 OUT_RING(chan, screen->base.channel->vram->handle);
307 OUT_RING(chan, screen->base.channel->vram->handle);
308
309 OUT_RING(chan, RING_3D(0x1ea4, 3));
310 OUT_RING(chan, 0x00000010);
311 OUT_RING(chan, 0x01000100);
312 OUT_RING(chan, 0xff800006);
313
314 /* vtxprog output routing */
315 OUT_RING(chan, RING_3D(0x1fc4, 1));
316 OUT_RING(chan, 0x06144321);
317 OUT_RING(chan, RING_3D(0x1fc8, 2));
318 OUT_RING(chan, 0xedcba987);
319 OUT_RING(chan, 0x00000021);
320 OUT_RING(chan, RING_3D(0x1fd0, 1));
321 OUT_RING(chan, 0x00171615);
322 OUT_RING(chan, RING_3D(0x1fd4, 1));
323 OUT_RING(chan, 0x001b1a19);
324
325 OUT_RING(chan, RING_3D(0x1ef8, 1));
326 OUT_RING(chan, 0x0020ffff);
327 OUT_RING(chan, RING_3D(0x1d64, 1));
328 OUT_RING(chan, 0x00d30000);
329 OUT_RING(chan, RING_3D(0x1e94, 1));
330 OUT_RING(chan, 0x00000001);
331 }
332
333 static unsigned
334 nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen* screen)
335 {
336 int vram_hack_default = 0;
337 int vram_hack;
338 // TODO: this is a bit of a guess; also add other cards that may need this hack.
339 // It may also depend on the specific card or the AGP/PCIe chipset.
340 if(screen->base.device->chipset == 0x47 /* G70 */
341 || screen->base.device->chipset == 0x49 /* G71 */
342 || screen->base.device->chipset == 0x46 /* G72 */
343 )
344 vram_hack_default = 1;
345 vram_hack = debug_get_bool_option("NOUVEAU_VTXIDX_IN_VRAM", vram_hack_default);
346
347 #ifdef DEBUG
348 if(!vram_hack)
349 {
350 fprintf(stderr, "Some systems may experience graphics corruption due to randomly misplaced vertices.\n"
351 "If this is happening, export NOUVEAU_VTXIDX_IN_VRAM=1 may reduce or eliminate the problem\n");
352 }
353 else
354 {
355 fprintf(stderr, "A performance reducing hack is being used to help avoid graphics corruption.\n"
356 "You can try export NOUVEAU_VTXIDX_IN_VRAM=0 to disable it.\n");
357 }
358 #endif
359
360 return vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART;
361 }
362
363 static void nvfx_channel_flush_notify(struct nouveau_channel* chan)
364 {
365 struct nvfx_screen* screen = chan->user_private;
366 struct nvfx_context* nvfx = screen->cur_ctx;
367 if(nvfx)
368 nvfx->relocs_needed = NVFX_RELOCATE_ALL;
369 }
370
371 struct pipe_screen *
372 nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
373 {
374 static const unsigned query_sizes[] = {(4096 - 4 * 32) / 32, 3 * 1024 / 32, 2 * 1024 / 32, 1024 / 32};
375 struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen);
376 struct nouveau_channel *chan;
377 struct pipe_screen *pscreen;
378 unsigned eng3d_class = 0;
379 int ret, i;
380
381 if (!screen)
382 return NULL;
383
384 pscreen = &screen->base.base;
385
386 ret = nouveau_screen_init(&screen->base, dev);
387 if (ret) {
388 nvfx_screen_destroy(pscreen);
389 return NULL;
390 }
391 chan = screen->base.channel;
392 screen->cur_ctx = NULL;
393 chan->user_private = screen;
394 chan->flush_notify = nvfx_channel_flush_notify;
395
396 pscreen->winsys = ws;
397 pscreen->destroy = nvfx_screen_destroy;
398 pscreen->get_param = nvfx_screen_get_param;
399 pscreen->get_paramf = nvfx_screen_get_paramf;
400 pscreen->is_format_supported = nvfx_screen_is_format_supported;
401 pscreen->context_create = nvfx_create;
402
403 switch (dev->chipset & 0xf0) {
404 case 0x30:
405 if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
406 eng3d_class = 0x0397;
407 else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
408 eng3d_class = 0x0697;
409 else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
410 eng3d_class = 0x0497;
411 break;
412 case 0x40:
413 if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f)))
414 eng3d_class = NV40TCL;
415 else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
416 eng3d_class = NV44TCL;
417 screen->is_nv4x = ~0;
418 break;
419 case 0x60:
420 if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
421 eng3d_class = NV44TCL;
422 screen->is_nv4x = ~0;
423 break;
424 }
425
426 if (!eng3d_class) {
427 NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset);
428 return NULL;
429 }
430
431 screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE);
432 screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE);
433
434 screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384);
435 screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0"));
436 screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0"));
437
438 screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen);
439
440 /* surely both nv3x and nv44 support index buffers too: find out how and test that */
441 if(eng3d_class == NV40TCL)
442 screen->index_buffer_reloc_flags = screen->vertex_buffer_reloc_flags;
443
444 if(!screen->force_swtnl && screen->vertex_buffer_reloc_flags == screen->index_buffer_reloc_flags)
445 screen->base.vertex_buffer_flags = screen->base.index_buffer_flags = screen->vertex_buffer_reloc_flags;
446
447 nvfx_screen_init_resource_functions(pscreen);
448
449 ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d);
450 if (ret) {
451 NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
452 return FALSE;
453 }
454
455 /* 2D engine setup */
456 nvfx_screen_surface_init(pscreen);
457
458 /* Notifier for sync purposes */
459 ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
460 if (ret) {
461 NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
462 nvfx_screen_destroy(pscreen);
463 return NULL;
464 }
465
466 /* Query objects */
467 for(i = 0; i < sizeof(query_sizes) / sizeof(query_sizes[0]); ++i)
468 {
469 ret = nouveau_notifier_alloc(chan, 0xbeef0302, query_sizes[i], &screen->query);
470 if(!ret)
471 break;
472 }
473
474 if (ret) {
475 NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
476 nvfx_screen_destroy(pscreen);
477 return NULL;
478 }
479
480 ret = nouveau_resource_init(&screen->query_heap, 0, query_sizes[i]);
481 if (ret) {
482 NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
483 nvfx_screen_destroy(pscreen);
484 return NULL;
485 }
486
487 LIST_INITHEAD(&screen->query_list);
488
489 /* Vtxprog resources */
490 if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) ||
491 /* XXX: this should actually be 468 or 256, but apparently indirect addressing
492 * cannot read consts starting from 212 on nv40.
493 * It looks like 44 slots are reserved for something, and there is a "mode switch"
494 * from 256 slots to 512 slots that we are setting to "256 mode" on nv40, leading
495 * to 212 = 256 - 44 instead of 468 = 512 - 44 usable slots.
496 */
497 nouveau_resource_init(&screen->vp_data_heap, 0, 212)) {
498 nvfx_screen_destroy(pscreen);
499 return NULL;
500 }
501
502 BIND_RING(chan, screen->eng3d, 7);
503
504 /* Static eng3d initialisation */
505 /* note that we just started using the channel, so we must have space in the pushbuffer */
506 OUT_RING(chan, RING_3D(NV34TCL_DMA_NOTIFY, 1));
507 OUT_RING(chan, screen->sync->handle);
508 OUT_RING(chan, RING_3D(NV34TCL_DMA_TEXTURE0, 2));
509 OUT_RING(chan, chan->vram->handle);
510 OUT_RING(chan, chan->gart->handle);
511 OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1));
512 OUT_RING(chan, chan->vram->handle);
513 OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 2));
514 OUT_RING(chan, chan->vram->handle);
515 OUT_RING(chan, chan->vram->handle);
516 OUT_RING(chan, RING_3D(NV34TCL_DMA_VTXBUF0, 2));
517 OUT_RING(chan, chan->vram->handle);
518 OUT_RING(chan, chan->gart->handle);
519
520 OUT_RING(chan, RING_3D(NV34TCL_DMA_FENCE, 2));
521 OUT_RING(chan, 0);
522 OUT_RING(chan, screen->query->handle);
523
524 OUT_RING(chan, RING_3D(NV34TCL_DMA_IN_MEMORY7, 2));
525 OUT_RING(chan, chan->vram->handle);
526 OUT_RING(chan, chan->vram->handle);
527
528 if(!screen->is_nv4x)
529 nv30_screen_init(screen);
530 else
531 nv40_screen_init(screen);
532
533 return pscreen;
534 }