060f59d0c73d7b69273d888d19db68b2e7475ee6
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_compute.c
1 /*
2 * Copyright 2013 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller, Samuel Pitoiset
23 */
24
25 #include "nvc0/nvc0_context.h"
26
27 #include "nvc0/nvc0_compute.xml.h"
28
29 int
30 nvc0_screen_compute_setup(struct nvc0_screen *screen,
31 struct nouveau_pushbuf *push)
32 {
33 struct nouveau_object *chan = screen->base.channel;
34 struct nouveau_device *dev = screen->base.device;
35 uint32_t obj_class;
36 int ret;
37 int i;
38
39 switch (dev->chipset & ~0xf) {
40 case 0xc0:
41 case 0xd0:
42 /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
43 * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
44 obj_class = NVC0_COMPUTE_CLASS;
45 break;
46 default:
47 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
48 return -1;
49 }
50
51 ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
52 &screen->compute);
53 if (ret) {
54 NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
55 return ret;
56 }
57
58 ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
59 &screen->parm);
60 if (ret)
61 return ret;
62
63 BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
64 PUSH_DATA (push, screen->compute->oclass);
65
66 /* hardware limit */
67 BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);
68 PUSH_DATA (push, screen->mp_count);
69 BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);
70 PUSH_DATA (push, 0xf);
71
72 BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);
73 PUSH_DATA (push, 0x8000);
74
75 /* global memory setup */
76 BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
77 PUSH_DATA (push, 0);
78 BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);
79 for (i = 0; i <= 0xff; i++)
80 PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
81 BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
82 PUSH_DATA (push, 1);
83
84 /* local memory and cstack setup */
85 BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);
86 PUSH_DATAh(push, screen->tls->offset);
87 PUSH_DATA (push, screen->tls->offset);
88 BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);
89 PUSH_DATAh(push, screen->tls->size);
90 PUSH_DATA (push, screen->tls->size);
91 BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);
92 PUSH_DATA (push, 0);
93 BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);
94 PUSH_DATA (push, 0xff << 24);
95
96 /* shared memory setup */
97 BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);
98 PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
99 BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);
100 PUSH_DATA (push, 0xfe << 24);
101 BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);
102 PUSH_DATA (push, 0);
103
104 /* code segment setup */
105 BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
106 PUSH_DATAh(push, screen->text->offset);
107 PUSH_DATA (push, screen->text->offset);
108
109 /* textures */
110 BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);
111 PUSH_DATAh(push, screen->txc->offset);
112 PUSH_DATA (push, screen->txc->offset);
113 PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
114
115 /* samplers */
116 BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);
117 PUSH_DATAh(push, screen->txc->offset + 65536);
118 PUSH_DATA (push, screen->txc->offset + 65536);
119 PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
120
121 return 0;
122 }
123
124 static void
125 nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
126 {
127 bool need_flush = nvc0_validate_tsc(nvc0, 5);
128 if (need_flush) {
129 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);
130 PUSH_DATA (nvc0->base.pushbuf, 0);
131 }
132 }
133
134 static void
135 nvc0_compute_validate_textures(struct nvc0_context *nvc0)
136 {
137 bool need_flush = nvc0_validate_tic(nvc0, 5);
138 if (need_flush) {
139 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);
140 PUSH_DATA (nvc0->base.pushbuf, 0);
141 }
142 }
143
144 static void
145 nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
146 {
147 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
148 const int s = 5;
149
150 while (nvc0->constbuf_dirty[s]) {
151 int i = ffs(nvc0->constbuf_dirty[s]) - 1;
152 nvc0->constbuf_dirty[s] &= ~(1 << i);
153
154 if (nvc0->constbuf[s][i].user) {
155 struct nouveau_bo *bo = nvc0->screen->uniform_bo;
156 const unsigned base = s << 16;
157 const unsigned size = nvc0->constbuf[s][0].size;
158 assert(i == 0); /* we really only want OpenGL uniforms here */
159 assert(nvc0->constbuf[s][0].u.data);
160
161 if (nvc0->state.uniform_buffer_bound[s] < size) {
162 nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
163
164 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
165 PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
166 PUSH_DATAh(push, bo->offset + base);
167 PUSH_DATA (push, bo->offset + base);
168 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
169 PUSH_DATA (push, (0 << 8) | 1);
170 }
171 nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
172 base, nvc0->state.uniform_buffer_bound[s],
173 0, (size + 3) / 4,
174 nvc0->constbuf[s][0].u.data);
175 } else {
176 struct nv04_resource *res =
177 nv04_resource(nvc0->constbuf[s][i].u.buf);
178 if (res) {
179 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
180 PUSH_DATA (push, nvc0->constbuf[s][i].size);
181 PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
182 PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
183 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
184 PUSH_DATA (push, (i << 8) | 1);
185
186 BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
187
188 res->cb_bindings[s] |= 1 << i;
189 } else {
190 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
191 PUSH_DATA (push, (i << 8) | 0);
192 }
193 if (i == 0)
194 nvc0->state.uniform_buffer_bound[s] = 0;
195 }
196 }
197
198 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
199 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
200 }
201
202 static void
203 nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
204 {
205 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
206 struct nvc0_screen *screen = nvc0->screen;
207
208 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
209 PUSH_DATA (push, 1024);
210 PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
211 PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
212 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
213 PUSH_DATA (push, (15 << 8) | 1);
214
215 nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST;
216 }
217
218 static void
219 nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
220 {
221 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
222 const int s = 5;
223 int i;
224
225 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
226 PUSH_DATA (push, 1024);
227 PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
228 PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
229 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
230 PUSH_DATA (push, 512);
231
232 for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
233 if (nvc0->buffers[s][i].buffer) {
234 struct nv04_resource *res =
235 nv04_resource(nvc0->buffers[s][i].buffer);
236 PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
237 PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
238 PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
239 PUSH_DATA (push, 0);
240 BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
241 } else {
242 PUSH_DATA (push, 0);
243 PUSH_DATA (push, 0);
244 PUSH_DATA (push, 0);
245 PUSH_DATA (push, 0);
246 }
247 }
248 }
249
250 void
251 nvc0_compute_validate_globals(struct nvc0_context *nvc0)
252 {
253 unsigned i;
254
255 for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
256 ++i) {
257 struct pipe_resource *res = *util_dynarray_element(
258 &nvc0->global_residents, struct pipe_resource *, i);
259 if (res)
260 nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL,
261 nv04_resource(res), NOUVEAU_BO_RDWR);
262 }
263 }
264
265 static bool
266 nvc0_compute_state_validate(struct nvc0_context *nvc0)
267 {
268 nvc0_compprog_validate(nvc0);
269 if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF)
270 nvc0_compute_validate_constbufs(nvc0);
271 if (nvc0->dirty_cp & NVC0_NEW_CP_DRIVERCONST)
272 nvc0_compute_validate_driverconst(nvc0);
273 if (nvc0->dirty_cp & NVC0_NEW_CP_BUFFERS)
274 nvc0_compute_validate_buffers(nvc0);
275 if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
276 nvc0_compute_validate_textures(nvc0);
277 if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
278 nvc0_compute_validate_samplers(nvc0);
279 if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS)
280 nvc0_compute_validate_globals(nvc0);
281
282 /* TODO: surfaces */
283
284 nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
285
286 nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
287 if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
288 return false;
289 if (unlikely(nvc0->state.flushed))
290 nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
291
292 return true;
293
294 }
295
296 static void
297 nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
298 {
299 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
300 struct nvc0_screen *screen = nvc0->screen;
301 struct nvc0_program *cp = nvc0->compprog;
302
303 if (cp->parm_size) {
304 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
305 PUSH_DATA (push, align(cp->parm_size, 0x100));
306 PUSH_DATAh(push, screen->parm->offset);
307 PUSH_DATA (push, screen->parm->offset);
308 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
309 PUSH_DATA (push, (0 << 8) | 1);
310 /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
311 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
312 PUSH_DATA (push, 0);
313 PUSH_DATAp(push, input, cp->parm_size / 4);
314
315 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
316 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
317 }
318 }
319
320 void
321 nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
322 {
323 struct nvc0_context *nvc0 = nvc0_context(pipe);
324 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
325 struct nvc0_program *cp = nvc0->compprog;
326 unsigned s;
327 int ret;
328
329 ret = !nvc0_compute_state_validate(nvc0);
330 if (ret) {
331 NOUVEAU_ERR("Failed to launch grid !\n");
332 return;
333 }
334
335 nvc0_compute_upload_input(nvc0, info->input);
336
337 BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
338 PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
339
340 BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
341 PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
342 PUSH_DATA (push, 0);
343 PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
344
345 BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);
346 PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
347 PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
348 PUSH_DATA (push, cp->num_barriers);
349 BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);
350 PUSH_DATA (push, cp->num_gprs);
351
352 /* launch preliminary setup */
353 BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);
354 PUSH_DATA (push, 0x1);
355 BEGIN_NVC0(push, SUBC_CP(0x036c), 1);
356 PUSH_DATA (push, 0);
357 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
358 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
359
360 /* block setup */
361 BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);
362 PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
363 PUSH_DATA (push, info->block[2]);
364
365 if (unlikely(info->indirect)) {
366 struct nv04_resource *res = nv04_resource(info->indirect);
367 uint32_t offset = res->offset + info->indirect_offset;
368 unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;
369
370 nouveau_pushbuf_space(push, 16, 0, 1);
371 PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
372 PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
373 nouveau_pushbuf_data(push, res->bo, offset,
374 NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
375 } else {
376 /* grid setup */
377 BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);
378 PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
379 PUSH_DATA (push, info->grid[2]);
380
381 /* kernel launching */
382 BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);
383 PUSH_DATA (push, 0);
384 BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);
385 PUSH_DATA (push, 0);
386 BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);
387 PUSH_DATA (push, 0x1000);
388 BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);
389 PUSH_DATA (push, 0);
390 BEGIN_NVC0(push, SUBC_CP(0x0360), 1);
391 PUSH_DATA (push, 0x1);
392 }
393
394 /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
395 nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
396 for (s = 0; s < 5; s++) {
397 nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
398 nvc0->state.uniform_buffer_bound[s] = 0;
399 }
400 }