radeonsi: use a clever alignment for constant buffer uploads
[mesa.git] / src / gallium / drivers / nouveau / nv50 / nv50_context.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "pipe/p_defines.h"
24 #include "util/u_framebuffer.h"
25 #include "util/u_upload_mgr.h"
26
27 #include "nv50/nv50_context.h"
28 #include "nv50/nv50_screen.h"
29 #include "nv50/nv50_resource.h"
30
31 static void
32 nv50_flush(struct pipe_context *pipe,
33 struct pipe_fence_handle **fence,
34 unsigned flags)
35 {
36 struct nouveau_screen *screen = nouveau_screen(pipe->screen);
37
38 if (fence)
39 nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
40
41 PUSH_KICK(screen->pushbuf);
42
43 nouveau_context_update_frame_stats(nouveau_context(pipe));
44 }
45
46 static void
47 nv50_texture_barrier(struct pipe_context *pipe, unsigned flags)
48 {
49 struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
50
51 BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
52 PUSH_DATA (push, 0);
53 BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
54 PUSH_DATA (push, 0x20);
55 }
56
57 static void
58 nv50_memory_barrier(struct pipe_context *pipe, unsigned flags)
59 {
60 struct nv50_context *nv50 = nv50_context(pipe);
61 int i, s;
62
63 if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
64 for (i = 0; i < nv50->num_vtxbufs; ++i) {
65 if (!nv50->vtxbuf[i].buffer)
66 continue;
67 if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
68 nv50->base.vbo_dirty = true;
69 }
70
71 if (nv50->idxbuf.buffer &&
72 nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
73 nv50->base.vbo_dirty = true;
74
75 for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
76 uint32_t valid = nv50->constbuf_valid[s];
77
78 while (valid && !nv50->cb_dirty) {
79 const unsigned i = ffs(valid) - 1;
80 struct pipe_resource *res;
81
82 valid &= ~(1 << i);
83 if (nv50->constbuf[s][i].user)
84 continue;
85
86 res = nv50->constbuf[s][i].u.buf;
87 if (!res)
88 continue;
89
90 if (res->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
91 nv50->cb_dirty = true;
92 }
93 }
94 }
95 }
96
97 static void
98 nv50_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
99 {
100 struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
101 int string_words = len / 4;
102 int data_words;
103
104 if (len <= 0)
105 return;
106 string_words = MIN2(string_words, NV04_PFIFO_MAX_PACKET_LEN);
107 if (string_words == NV04_PFIFO_MAX_PACKET_LEN)
108 data_words = string_words;
109 else
110 data_words = string_words + !!(len & 3);
111 BEGIN_NI04(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
112 if (string_words)
113 PUSH_DATAp(push, str, string_words);
114 if (string_words != data_words) {
115 int data = 0;
116 memcpy(&data, &str[string_words * 4], len & 3);
117 PUSH_DATA (push, data);
118 }
119 }
120
121 void
122 nv50_default_kick_notify(struct nouveau_pushbuf *push)
123 {
124 struct nv50_screen *screen = push->user_priv;
125
126 if (screen) {
127 nouveau_fence_next(&screen->base);
128 nouveau_fence_update(&screen->base, true);
129 if (screen->cur_ctx)
130 screen->cur_ctx->state.flushed = true;
131 }
132 }
133
134 static void
135 nv50_context_unreference_resources(struct nv50_context *nv50)
136 {
137 unsigned s, i;
138
139 nouveau_bufctx_del(&nv50->bufctx_3d);
140 nouveau_bufctx_del(&nv50->bufctx);
141 nouveau_bufctx_del(&nv50->bufctx_cp);
142
143 util_unreference_framebuffer_state(&nv50->framebuffer);
144
145 assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
146 for (i = 0; i < nv50->num_vtxbufs; ++i)
147 pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL);
148
149 pipe_resource_reference(&nv50->idxbuf.buffer, NULL);
150
151 for (s = 0; s < 3; ++s) {
152 assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
153 for (i = 0; i < nv50->num_textures[s]; ++i)
154 pipe_sampler_view_reference(&nv50->textures[s][i], NULL);
155
156 for (i = 0; i < NV50_MAX_PIPE_CONSTBUFS; ++i)
157 if (!nv50->constbuf[s][i].user)
158 pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL);
159 }
160
161 for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
162 ++i) {
163 struct pipe_resource **res = util_dynarray_element(
164 &nv50->global_residents, struct pipe_resource *, i);
165 pipe_resource_reference(res, NULL);
166 }
167 util_dynarray_fini(&nv50->global_residents);
168 }
169
170 static void
171 nv50_destroy(struct pipe_context *pipe)
172 {
173 struct nv50_context *nv50 = nv50_context(pipe);
174
175 if (nv50->screen->cur_ctx == nv50) {
176 nv50->screen->cur_ctx = NULL;
177 /* Save off the state in case another context gets created */
178 nv50->screen->save_state = nv50->state;
179 }
180
181 if (nv50->base.pipe.stream_uploader)
182 u_upload_destroy(nv50->base.pipe.stream_uploader);
183
184 nouveau_pushbuf_bufctx(nv50->base.pushbuf, NULL);
185 nouveau_pushbuf_kick(nv50->base.pushbuf, nv50->base.pushbuf->channel);
186
187 nv50_context_unreference_resources(nv50);
188
189 FREE(nv50->blit);
190
191 nouveau_context_destroy(&nv50->base);
192 }
193
194 static int
195 nv50_invalidate_resource_storage(struct nouveau_context *ctx,
196 struct pipe_resource *res,
197 int ref)
198 {
199 struct nv50_context *nv50 = nv50_context(&ctx->pipe);
200 unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
201 unsigned s, i;
202
203 if (bind & PIPE_BIND_RENDER_TARGET) {
204 assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
205 for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
206 if (nv50->framebuffer.cbufs[i] &&
207 nv50->framebuffer.cbufs[i]->texture == res) {
208 nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
209 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
210 if (!--ref)
211 return ref;
212 }
213 }
214 }
215 if (bind & PIPE_BIND_DEPTH_STENCIL) {
216 if (nv50->framebuffer.zsbuf &&
217 nv50->framebuffer.zsbuf->texture == res) {
218 nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
219 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
220 if (!--ref)
221 return ref;
222 }
223 }
224
225 if (bind & (PIPE_BIND_VERTEX_BUFFER |
226 PIPE_BIND_INDEX_BUFFER |
227 PIPE_BIND_CONSTANT_BUFFER |
228 PIPE_BIND_STREAM_OUTPUT |
229 PIPE_BIND_SAMPLER_VIEW)) {
230
231 assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
232 for (i = 0; i < nv50->num_vtxbufs; ++i) {
233 if (nv50->vtxbuf[i].buffer == res) {
234 nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
235 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX);
236 if (!--ref)
237 return ref;
238 }
239 }
240
241 if (nv50->idxbuf.buffer == res) {
242 /* Just rebind to the bufctx as there is no separate dirty bit */
243 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);
244 BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(res), RD);
245 if (!--ref)
246 return ref;
247 }
248
249 for (s = 0; s < 3; ++s) {
250 assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
251 for (i = 0; i < nv50->num_textures[s]; ++i) {
252 if (nv50->textures[s][i] &&
253 nv50->textures[s][i]->texture == res) {
254 nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
255 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
256 if (!--ref)
257 return ref;
258 }
259 }
260 }
261
262 for (s = 0; s < 3; ++s) {
263 for (i = 0; i < NV50_MAX_PIPE_CONSTBUFS; ++i) {
264 if (!(nv50->constbuf_valid[s] & (1 << i)))
265 continue;
266 if (!nv50->constbuf[s][i].user &&
267 nv50->constbuf[s][i].u.buf == res) {
268 nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
269 nv50->constbuf_dirty[s] |= 1 << i;
270 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
271 if (!--ref)
272 return ref;
273 }
274 }
275 }
276 }
277
278 return ref;
279 }
280
281 static void
282 nv50_context_get_sample_position(struct pipe_context *, unsigned, unsigned,
283 float *);
284
285 struct pipe_context *
286 nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
287 {
288 struct nv50_screen *screen = nv50_screen(pscreen);
289 struct nv50_context *nv50;
290 struct pipe_context *pipe;
291 int ret;
292 uint32_t flags;
293
294 nv50 = CALLOC_STRUCT(nv50_context);
295 if (!nv50)
296 return NULL;
297 pipe = &nv50->base.pipe;
298
299 if (!nv50_blitctx_create(nv50))
300 goto out_err;
301
302 nv50->base.pushbuf = screen->base.pushbuf;
303 nv50->base.client = screen->base.client;
304
305 ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
306 if (!ret)
307 ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_3D_COUNT,
308 &nv50->bufctx_3d);
309 if (!ret)
310 ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_CP_COUNT,
311 &nv50->bufctx_cp);
312 if (ret)
313 goto out_err;
314
315 nv50->base.screen = &screen->base;
316 nv50->base.copy_data = nv50_m2mf_copy_linear;
317 nv50->base.push_data = nv50_sifc_linear_u8;
318 nv50->base.push_cb = nv50_cb_push;
319
320 nv50->screen = screen;
321 pipe->screen = pscreen;
322 pipe->priv = priv;
323 pipe->stream_uploader = u_upload_create_default(pipe);
324 if (!pipe->stream_uploader)
325 goto out_err;
326 pipe->const_uploader = pipe->stream_uploader;
327
328 pipe->destroy = nv50_destroy;
329
330 pipe->draw_vbo = nv50_draw_vbo;
331 pipe->clear = nv50_clear;
332 pipe->launch_grid = nv50_launch_grid;
333
334 pipe->flush = nv50_flush;
335 pipe->texture_barrier = nv50_texture_barrier;
336 pipe->memory_barrier = nv50_memory_barrier;
337 pipe->get_sample_position = nv50_context_get_sample_position;
338 pipe->emit_string_marker = nv50_emit_string_marker;
339
340 if (!screen->cur_ctx) {
341 /* Restore the last context's state here, normally handled during
342 * context switch
343 */
344 nv50->state = screen->save_state;
345 screen->cur_ctx = nv50;
346 nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx);
347 }
348 nv50->base.pushbuf->kick_notify = nv50_default_kick_notify;
349
350 nouveau_context_init(&nv50->base);
351 nv50_init_query_functions(nv50);
352 nv50_init_surface_functions(nv50);
353 nv50_init_state_functions(nv50);
354 nv50_init_resource_functions(pipe);
355
356 nv50->base.invalidate_resource_storage = nv50_invalidate_resource_storage;
357
358 if (screen->base.device->chipset < 0x84 ||
359 debug_get_bool_option("NOUVEAU_PMPEG", false)) {
360 /* PMPEG */
361 nouveau_context_init_vdec(&nv50->base);
362 } else if (screen->base.device->chipset < 0x98 ||
363 screen->base.device->chipset == 0xa0) {
364 /* VP2 */
365 pipe->create_video_codec = nv84_create_decoder;
366 pipe->create_video_buffer = nv84_video_buffer_create;
367 } else {
368 /* VP3/4 */
369 pipe->create_video_codec = nv98_create_decoder;
370 pipe->create_video_buffer = nv98_video_buffer_create;
371 }
372
373 flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
374
375 BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->code);
376 BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->uniforms);
377 BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->txc);
378 BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->stack_bo);
379 if (screen->compute) {
380 BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
381 BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
382 BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo);
383 }
384
385 flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
386
387 BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo);
388 BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
389 if (screen->compute)
390 BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
391
392 nv50->base.scratch.bo_size = 2 << 20;
393
394 util_dynarray_init(&nv50->global_residents);
395
396 return pipe;
397
398 out_err:
399 if (pipe->stream_uploader)
400 u_upload_destroy(pipe->stream_uploader);
401 if (nv50->bufctx_3d)
402 nouveau_bufctx_del(&nv50->bufctx_3d);
403 if (nv50->bufctx_cp)
404 nouveau_bufctx_del(&nv50->bufctx_cp);
405 if (nv50->bufctx)
406 nouveau_bufctx_del(&nv50->bufctx);
407 FREE(nv50->blit);
408 FREE(nv50);
409 return NULL;
410 }
411
412 void
413 nv50_bufctx_fence(struct nouveau_bufctx *bufctx, bool on_flush)
414 {
415 struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
416 struct nouveau_list *it;
417
418 for (it = list->next; it != list; it = it->next) {
419 struct nouveau_bufref *ref = (struct nouveau_bufref *)it;
420 struct nv04_resource *res = ref->priv;
421 if (res)
422 nv50_resource_validate(res, (unsigned)ref->priv_data);
423 }
424 }
425
426 static void
427 nv50_context_get_sample_position(struct pipe_context *pipe,
428 unsigned sample_count, unsigned sample_index,
429 float *xy)
430 {
431 static const uint8_t ms1[1][2] = { { 0x8, 0x8 } };
432 static const uint8_t ms2[2][2] = {
433 { 0x4, 0x4 }, { 0xc, 0xc } }; /* surface coords (0,0), (1,0) */
434 static const uint8_t ms4[4][2] = {
435 { 0x6, 0x2 }, { 0xe, 0x6 }, /* (0,0), (1,0) */
436 { 0x2, 0xa }, { 0xa, 0xe } }; /* (0,1), (1,1) */
437 static const uint8_t ms8[8][2] = {
438 { 0x1, 0x7 }, { 0x5, 0x3 }, /* (0,0), (1,0) */
439 { 0x3, 0xd }, { 0x7, 0xb }, /* (0,1), (1,1) */
440 { 0x9, 0x5 }, { 0xf, 0x1 }, /* (2,0), (3,0) */
441 { 0xb, 0xf }, { 0xd, 0x9 } }; /* (2,1), (3,1) */
442 #if 0
443 /* NOTE: there are alternative modes for MS2 and MS8, currently not used */
444 static const uint8_t ms8_alt[8][2] = {
445 { 0x9, 0x5 }, { 0x7, 0xb }, /* (2,0), (1,1) */
446 { 0xd, 0x9 }, { 0x5, 0x3 }, /* (3,1), (1,0) */
447 { 0x3, 0xd }, { 0x1, 0x7 }, /* (0,1), (0,0) */
448 { 0xb, 0xf }, { 0xf, 0x1 } }; /* (2,1), (3,0) */
449 #endif
450
451 const uint8_t (*ptr)[2];
452
453 switch (sample_count) {
454 case 0:
455 case 1: ptr = ms1; break;
456 case 2: ptr = ms2; break;
457 case 4: ptr = ms4; break;
458 case 8: ptr = ms8; break;
459 default:
460 assert(0);
461 return; /* bad sample count -> undefined locations */
462 }
463 xy[0] = ptr[sample_index][0] * 0.0625f;
464 xy[1] = ptr[sample_index][1] * 0.0625f;
465 }