nv50: activate more lanes in a warp
[mesa.git] / src / gallium / drivers / nv50 / nv50_screen.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_screen.h"
24
25 #include "nv50_context.h"
26 #include "nv50_screen.h"
27
28 #include "nouveau/nouveau_stateobj.h"
29
30 static boolean
31 nv50_screen_is_format_supported(struct pipe_screen *pscreen,
32 enum pipe_format format,
33 enum pipe_texture_target target,
34 unsigned tex_usage, unsigned geom_flags)
35 {
36 if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
37 switch (format) {
38 case PIPE_FORMAT_X8R8G8B8_UNORM:
39 case PIPE_FORMAT_A8R8G8B8_UNORM:
40 case PIPE_FORMAT_R5G6B5_UNORM:
41 case PIPE_FORMAT_R16G16B16A16_SNORM:
42 case PIPE_FORMAT_R16G16B16A16_UNORM:
43 case PIPE_FORMAT_R32G32B32A32_FLOAT:
44 case PIPE_FORMAT_R16G16_SNORM:
45 case PIPE_FORMAT_R16G16_UNORM:
46 return TRUE;
47 default:
48 break;
49 }
50 } else
51 if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
52 switch (format) {
53 case PIPE_FORMAT_Z32_FLOAT:
54 case PIPE_FORMAT_Z24S8_UNORM:
55 case PIPE_FORMAT_X8Z24_UNORM:
56 case PIPE_FORMAT_S8Z24_UNORM:
57 return TRUE;
58 default:
59 break;
60 }
61 } else {
62 switch (format) {
63 case PIPE_FORMAT_A8R8G8B8_UNORM:
64 case PIPE_FORMAT_X8R8G8B8_UNORM:
65 case PIPE_FORMAT_A8R8G8B8_SRGB:
66 case PIPE_FORMAT_X8R8G8B8_SRGB:
67 case PIPE_FORMAT_A1R5G5B5_UNORM:
68 case PIPE_FORMAT_A4R4G4B4_UNORM:
69 case PIPE_FORMAT_R5G6B5_UNORM:
70 case PIPE_FORMAT_L8_UNORM:
71 case PIPE_FORMAT_A8_UNORM:
72 case PIPE_FORMAT_I8_UNORM:
73 case PIPE_FORMAT_A8L8_UNORM:
74 case PIPE_FORMAT_DXT1_RGB:
75 case PIPE_FORMAT_DXT1_RGBA:
76 case PIPE_FORMAT_DXT3_RGBA:
77 case PIPE_FORMAT_DXT5_RGBA:
78 case PIPE_FORMAT_Z24S8_UNORM:
79 case PIPE_FORMAT_Z32_FLOAT:
80 case PIPE_FORMAT_R16G16B16A16_SNORM:
81 case PIPE_FORMAT_R16G16B16A16_UNORM:
82 case PIPE_FORMAT_R32G32B32A32_FLOAT:
83 case PIPE_FORMAT_R16G16_SNORM:
84 case PIPE_FORMAT_R16G16_UNORM:
85 return TRUE;
86 default:
87 break;
88 }
89 }
90
91 return FALSE;
92 }
93
94 static int
95 nv50_screen_get_param(struct pipe_screen *pscreen, int param)
96 {
97 switch (param) {
98 case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
99 return 32;
100 case PIPE_CAP_NPOT_TEXTURES:
101 return 1;
102 case PIPE_CAP_TWO_SIDED_STENCIL:
103 return 1;
104 case PIPE_CAP_GLSL:
105 return 0;
106 case PIPE_CAP_ANISOTROPIC_FILTER:
107 return 1;
108 case PIPE_CAP_POINT_SPRITE:
109 return 1;
110 case PIPE_CAP_MAX_RENDER_TARGETS:
111 return 8;
112 case PIPE_CAP_OCCLUSION_QUERY:
113 return 1;
114 case PIPE_CAP_TEXTURE_SHADOW_MAP:
115 return 1;
116 case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
117 return 13;
118 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
119 return 10;
120 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
121 return 13;
122 case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
123 case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
124 return 1;
125 case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
126 return 0;
127 case PIPE_CAP_TGSI_CONT_SUPPORTED:
128 return 0;
129 case PIPE_CAP_BLEND_EQUATION_SEPARATE:
130 return 1;
131 case NOUVEAU_CAP_HW_VTXBUF:
132 return 1;
133 case NOUVEAU_CAP_HW_IDXBUF:
134 return 0;
135 default:
136 NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
137 return 0;
138 }
139 }
140
141 static float
142 nv50_screen_get_paramf(struct pipe_screen *pscreen, int param)
143 {
144 switch (param) {
145 case PIPE_CAP_MAX_LINE_WIDTH:
146 case PIPE_CAP_MAX_LINE_WIDTH_AA:
147 return 10.0;
148 case PIPE_CAP_MAX_POINT_WIDTH:
149 case PIPE_CAP_MAX_POINT_WIDTH_AA:
150 return 64.0;
151 case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
152 return 16.0;
153 case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
154 return 4.0;
155 default:
156 NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
157 return 0.0;
158 }
159 }
160
161 static void
162 nv50_screen_destroy(struct pipe_screen *pscreen)
163 {
164 struct nv50_screen *screen = nv50_screen(pscreen);
165
166 nouveau_notifier_free(&screen->sync);
167 nouveau_grobj_free(&screen->tesla);
168 nouveau_grobj_free(&screen->eng2d);
169 nouveau_grobj_free(&screen->m2mf);
170 nouveau_screen_fini(&screen->base);
171 FREE(screen);
172 }
173
174 struct pipe_screen *
175 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
176 {
177 struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
178 struct nouveau_channel *chan;
179 struct pipe_screen *pscreen;
180 struct nouveau_stateobj *so;
181 unsigned chipset = dev->chipset;
182 unsigned tesla_class = 0;
183 int ret, i;
184
185 if (!screen)
186 return NULL;
187 pscreen = &screen->base.base;
188
189 ret = nouveau_screen_init(&screen->base, dev);
190 if (ret) {
191 nv50_screen_destroy(pscreen);
192 return NULL;
193 }
194 chan = screen->base.channel;
195
196 pscreen->winsys = ws;
197 pscreen->destroy = nv50_screen_destroy;
198 pscreen->get_param = nv50_screen_get_param;
199 pscreen->get_paramf = nv50_screen_get_paramf;
200 pscreen->is_format_supported = nv50_screen_is_format_supported;
201
202 nv50_screen_init_miptree_functions(pscreen);
203 nv50_transfer_init_screen_functions(pscreen);
204
205 /* DMA engine object */
206 ret = nouveau_grobj_alloc(chan, 0xbeef5039,
207 NV50_MEMORY_TO_MEMORY_FORMAT, &screen->m2mf);
208 if (ret) {
209 NOUVEAU_ERR("Error creating M2MF object: %d\n", ret);
210 nv50_screen_destroy(pscreen);
211 return NULL;
212 }
213 BIND_RING(chan, screen->m2mf, 1);
214
215 /* 2D object */
216 ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d);
217 if (ret) {
218 NOUVEAU_ERR("Error creating 2D object: %d\n", ret);
219 nv50_screen_destroy(pscreen);
220 return NULL;
221 }
222 BIND_RING(chan, screen->eng2d, 2);
223
224 /* 3D object */
225 switch (chipset & 0xf0) {
226 case 0x50:
227 tesla_class = NV50TCL;
228 break;
229 case 0x80:
230 case 0x90:
231 /* this stupid name should be corrected. */
232 tesla_class = NV54TCL;
233 break;
234 case 0xa0:
235 switch (chipset) {
236 case 0xa0:
237 case 0xaa:
238 case 0xac:
239 tesla_class = NVA0TCL;
240 break;
241 default:
242 tesla_class = 0x8597;
243 break;
244 }
245 break;
246 default:
247 NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset);
248 nv50_screen_destroy(pscreen);
249 return NULL;
250 }
251
252 ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class,
253 &screen->tesla);
254 if (ret) {
255 NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
256 nv50_screen_destroy(pscreen);
257 return NULL;
258 }
259 BIND_RING(chan, screen->tesla, 3);
260
261 /* Sync notifier */
262 ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
263 if (ret) {
264 NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
265 nv50_screen_destroy(pscreen);
266 return NULL;
267 }
268
269 /* Static M2MF init */
270 so = so_new(32, 0);
271 so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
272 so_data (so, screen->sync->handle);
273 so_data (so, chan->vram->handle);
274 so_data (so, chan->vram->handle);
275 so_emit(chan, so);
276 so_ref (NULL, &so);
277
278 /* Static 2D init */
279 so = so_new(64, 0);
280 so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4);
281 so_data (so, screen->sync->handle);
282 so_data (so, chan->vram->handle);
283 so_data (so, chan->vram->handle);
284 so_data (so, chan->vram->handle);
285 so_method(so, screen->eng2d, NV50_2D_OPERATION, 1);
286 so_data (so, NV50_2D_OPERATION_SRCCOPY);
287 so_method(so, screen->eng2d, 0x0290, 1);
288 so_data (so, 0);
289 so_method(so, screen->eng2d, 0x0888, 1);
290 so_data (so, 1);
291 so_emit(chan, so);
292 so_ref(NULL, &so);
293
294 /* Static tesla init */
295 so = so_new(256, 20);
296
297 so_method(so, screen->tesla, 0x1558, 1);
298 so_data (so, 1);
299 so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
300 so_data (so, screen->sync->handle);
301 so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0),
302 NV50TCL_DMA_UNK0__SIZE);
303 for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++)
304 so_data(so, chan->vram->handle);
305 so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0),
306 NV50TCL_DMA_UNK1__SIZE);
307 for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++)
308 so_data(so, chan->vram->handle);
309 so_method(so, screen->tesla, 0x121c, 1);
310 so_data (so, 1);
311
312 /* try to activate all/more lanes (threads) in a warp */
313 so_method(so, screen->tesla, 0x1400, 1);
314 so_data (so, 0xf);
315
316 so_method(so, screen->tesla, 0x13bc, 1);
317 so_data (so, 0x54);
318 /* origin is top left (set to 1 for bottom left) */
319 so_method(so, screen->tesla, 0x13ac, 1);
320 so_data (so, 0);
321 so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
322 so_data (so, 8);
323
324 /* constant buffers for immediates and VP/FP parameters */
325 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4,
326 &screen->constbuf_misc[0]);
327 if (ret) {
328 nv50_screen_destroy(pscreen);
329 return NULL;
330 }
331
332 for (i = 0; i < 2; i++) {
333 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4,
334 &screen->constbuf_parm[i]);
335 if (ret) {
336 nv50_screen_destroy(pscreen);
337 return NULL;
338 }
339 }
340
341 if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) ||
342 nouveau_resource_init(&screen->parm_heap[0], 0, 512) ||
343 nouveau_resource_init(&screen->parm_heap[1], 0, 512))
344 {
345 NOUVEAU_ERR("Error initialising constant buffers.\n");
346 nv50_screen_destroy(pscreen);
347 return NULL;
348 }
349
350 /*
351 // map constant buffers:
352 // B = buffer ID (maybe more than 1 byte)
353 // N = CB index used in shader instruction
354 // P = program type (0 = VP, 2 = GP, 3 = FP)
355 so_method(so, screen->tesla, 0x1694, 1);
356 so_data (so, 0x000BBNP1);
357 */
358
359 so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
360 so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
361 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
362 so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
363 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
364 so_data (so, (NV50_CB_PMISC << 16) | 0x00000200);
365 so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
366 so_data (so, 0x00000001 | (NV50_CB_PMISC << 12));
367 so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
368 so_data (so, 0x00000031 | (NV50_CB_PMISC << 12));
369
370 so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
371 so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM |
372 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
373 so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM |
374 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
375 so_data (so, (NV50_CB_PVP << 16) | 0x00000800);
376 so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
377 so_data (so, 0x00000101 | (NV50_CB_PVP << 12));
378
379 so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
380 so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM |
381 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
382 so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM |
383 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
384 so_data (so, (NV50_CB_PFP << 16) | 0x00000800);
385 so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
386 so_data (so, 0x00000131 | (NV50_CB_PFP << 12));
387
388 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic);
389 if (ret) {
390 nv50_screen_destroy(pscreen);
391 return NULL;
392 }
393
394 so_method(so, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3);
395 so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
396 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
397 so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
398 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
399 so_data (so, 0x000007ff);
400
401 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc);
402 if (ret) {
403 nv50_screen_destroy(pscreen);
404 return NULL;
405 }
406
407 so_method(so, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3);
408 so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
409 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
410 so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
411 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
412 so_data (so, 0x00000000);
413
414
415 /* Vertex array limits - max them out */
416 for (i = 0; i < 16; i++) {
417 so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2);
418 so_data (so, 0x000000ff);
419 so_data (so, 0xffffffff);
420 }
421
422 so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2);
423 so_data (so, fui(0.0));
424 so_data (so, fui(1.0));
425
426 so_method(so, screen->tesla, 0x1234, 1);
427 so_data (so, 1);
428
429 /* activate first scissor rectangle */
430 so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1);
431 so_data (so, 1);
432
433 so_emit(chan, so);
434 so_ref (so, &screen->static_init);
435 so_ref (NULL, &so);
436 nouveau_pushbuf_flush(chan, 0);
437
438 return pscreen;
439 }
440