nvc0: bind textures/samplers for compute on Fermi
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_compute.c
1 /*
2 * Copyright 2013 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller, Samuel Pitoiset
23 */
24
25 #include "nvc0/nvc0_context.h"
26 #include "nvc0/nvc0_compute.h"
27
28 int
29 nvc0_screen_compute_setup(struct nvc0_screen *screen,
30 struct nouveau_pushbuf *push)
31 {
32 struct nouveau_object *chan = screen->base.channel;
33 struct nouveau_device *dev = screen->base.device;
34 uint32_t obj_class;
35 int ret;
36 int i;
37
38 switch (dev->chipset & ~0xf) {
39 case 0xc0:
40 case 0xd0:
41 /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
42 * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
43 obj_class = NVC0_COMPUTE_CLASS;
44 break;
45 default:
46 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
47 return -1;
48 }
49
50 ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
51 &screen->compute);
52 if (ret) {
53 NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
54 return ret;
55 }
56
57 ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
58 &screen->parm);
59 if (ret)
60 return ret;
61
62 BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
63 PUSH_DATA (push, screen->compute->oclass);
64
65 /* hardware limit */
66 BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
67 PUSH_DATA (push, screen->mp_count);
68 BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
69 PUSH_DATA (push, 0xf);
70
71 BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
72 PUSH_DATA (push, 0x8000);
73
74 /* global memory setup */
75 BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
76 PUSH_DATA (push, 0);
77 BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
78 for (i = 0; i <= 0xff; i++)
79 PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
80 BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
81 PUSH_DATA (push, 1);
82
83 /* local memory and cstack setup */
84 BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
85 PUSH_DATAh(push, screen->tls->offset);
86 PUSH_DATA (push, screen->tls->offset);
87 BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
88 PUSH_DATAh(push, screen->tls->size);
89 PUSH_DATA (push, screen->tls->size);
90 BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
91 PUSH_DATA (push, 0);
92 BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
93 PUSH_DATA (push, 1 << 24);
94
95 /* shared memory setup */
96 BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
97 PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
98 BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
99 PUSH_DATA (push, 2 << 24);
100 BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
101 PUSH_DATA (push, 0);
102
103 /* code segment setup */
104 BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
105 PUSH_DATAh(push, screen->text->offset);
106 PUSH_DATA (push, screen->text->offset);
107
108 /* textures */
109 BEGIN_NVC0(push, NVC0_COMPUTE(TIC_ADDRESS_HIGH), 3);
110 PUSH_DATAh(push, screen->txc->offset);
111 PUSH_DATA (push, screen->txc->offset);
112 PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
113
114 /* samplers */
115 BEGIN_NVC0(push, NVC0_COMPUTE(TSC_ADDRESS_HIGH), 3);
116 PUSH_DATAh(push, screen->txc->offset + 65536);
117 PUSH_DATA (push, screen->txc->offset + 65536);
118 PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
119
120 return 0;
121 }
122
123 bool
124 nvc0_compute_validate_program(struct nvc0_context *nvc0)
125 {
126 struct nvc0_program *prog = nvc0->compprog;
127
128 if (prog->mem)
129 return true;
130
131 if (!prog->translated) {
132 prog->translated = nvc0_program_translate(
133 prog, nvc0->screen->base.device->chipset, &nvc0->base.debug);
134 if (!prog->translated)
135 return false;
136 }
137 if (unlikely(!prog->code_size))
138 return false;
139
140 if (likely(prog->code_size)) {
141 if (nvc0_program_upload_code(nvc0, prog)) {
142 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
143 BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
144 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
145 return true;
146 }
147 }
148 return false;
149 }
150
151 static void
152 nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
153 {
154 bool need_flush = nvc0_validate_tsc(nvc0, 5);
155 if (need_flush) {
156 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_COMPUTE(TSC_FLUSH), 1);
157 PUSH_DATA (nvc0->base.pushbuf, 0);
158 }
159 }
160
161 static void
162 nvc0_compute_validate_textures(struct nvc0_context *nvc0)
163 {
164 bool need_flush = nvc0_validate_tic(nvc0, 5);
165 if (need_flush) {
166 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_COMPUTE(TIC_FLUSH), 1);
167 PUSH_DATA (nvc0->base.pushbuf, 0);
168 }
169 }
170
171 static void
172 nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
173 {
174 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
175 const int s = 5;
176
177 while (nvc0->constbuf_dirty[s]) {
178 int i = ffs(nvc0->constbuf_dirty[s]) - 1;
179 nvc0->constbuf_dirty[s] &= ~(1 << i);
180
181 if (nvc0->constbuf[s][i].user) {
182 struct nouveau_bo *bo = nvc0->screen->uniform_bo;
183 const unsigned base = s << 16;
184 const unsigned size = nvc0->constbuf[s][0].size;
185 assert(i == 0); /* we really only want OpenGL uniforms here */
186 assert(nvc0->constbuf[s][0].u.data);
187
188 if (nvc0->state.uniform_buffer_bound[s] < size) {
189 nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
190
191 BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
192 PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
193 PUSH_DATAh(push, bo->offset + base);
194 PUSH_DATA (push, bo->offset + base);
195 BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
196 PUSH_DATA (push, (0 << 8) | 1);
197 }
198 nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
199 base, nvc0->state.uniform_buffer_bound[s],
200 0, (size + 3) / 4,
201 nvc0->constbuf[s][0].u.data);
202 } else {
203 struct nv04_resource *res =
204 nv04_resource(nvc0->constbuf[s][i].u.buf);
205 if (res) {
206 BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
207 PUSH_DATA (push, nvc0->constbuf[s][i].size);
208 PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
209 PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
210 BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
211 PUSH_DATA (push, (i << 8) | 1);
212
213 BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
214
215 res->cb_bindings[s] |= 1 << i;
216 } else {
217 BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
218 PUSH_DATA (push, (i << 8) | 0);
219 }
220 if (i == 0)
221 nvc0->state.uniform_buffer_bound[s] = 0;
222 }
223 }
224
225 BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
226 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
227 }
228
229 static void
230 nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
231 {
232 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
233 struct nvc0_screen *screen = nvc0->screen;
234
235 BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
236 PUSH_DATA (push, 1024);
237 PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
238 PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
239 BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
240 PUSH_DATA (push, (15 << 8) | 1);
241
242 nvc0->dirty |= NVC0_NEW_DRIVERCONST;
243 }
244
245 static void
246 nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
247 {
248 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
249 const int s = 5;
250 int i;
251
252 BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
253 PUSH_DATA (push, 1024);
254 PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
255 PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
256 BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
257 PUSH_DATA (push, 512);
258
259 for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
260 if (nvc0->buffers[s][i].buffer) {
261 struct nv04_resource *res =
262 nv04_resource(nvc0->buffers[s][i].buffer);
263 PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
264 PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
265 PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
266 PUSH_DATA (push, 0);
267 BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
268 } else {
269 PUSH_DATA (push, 0);
270 PUSH_DATA (push, 0);
271 PUSH_DATA (push, 0);
272 PUSH_DATA (push, 0);
273 }
274 }
275 }
276
277 static bool
278 nvc0_compute_state_validate(struct nvc0_context *nvc0)
279 {
280 if (!nvc0_compute_validate_program(nvc0))
281 return false;
282 if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF)
283 nvc0_compute_validate_constbufs(nvc0);
284 if (nvc0->dirty_cp & NVC0_NEW_CP_DRIVERCONST)
285 nvc0_compute_validate_driverconst(nvc0);
286 if (nvc0->dirty_cp & NVC0_NEW_CP_BUFFERS)
287 nvc0_compute_validate_buffers(nvc0);
288 if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
289 nvc0_compute_validate_textures(nvc0);
290 if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
291 nvc0_compute_validate_samplers(nvc0);
292
293 /* TODO: surfaces, global memory buffers */
294
295 nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
296
297 nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
298 if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
299 return false;
300 if (unlikely(nvc0->state.flushed))
301 nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
302
303 return true;
304
305 }
306
307 static void
308 nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
309 {
310 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
311 struct nvc0_screen *screen = nvc0->screen;
312 struct nvc0_program *cp = nvc0->compprog;
313
314 if (cp->parm_size) {
315 BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
316 PUSH_DATA (push, align(cp->parm_size, 0x100));
317 PUSH_DATAh(push, screen->parm->offset);
318 PUSH_DATA (push, screen->parm->offset);
319 BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
320 PUSH_DATA (push, (0 << 8) | 1);
321 /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
322 BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
323 PUSH_DATA (push, 0);
324 PUSH_DATAp(push, input, cp->parm_size / 4);
325
326 BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
327 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
328 }
329 }
330
331 void
332 nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
333 {
334 struct nvc0_context *nvc0 = nvc0_context(pipe);
335 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
336 struct nvc0_program *cp = nvc0->compprog;
337 unsigned s;
338 int ret;
339
340 ret = !nvc0_compute_state_validate(nvc0);
341 if (ret) {
342 NOUVEAU_ERR("Failed to launch grid !\n");
343 return;
344 }
345
346 nvc0_compute_upload_input(nvc0, info->input);
347
348 BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
349 PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
350
351 BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
352 PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
353 PUSH_DATA (push, 0);
354 PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
355
356 BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
357 PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
358 PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
359 PUSH_DATA (push, cp->num_barriers);
360 BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
361 PUSH_DATA (push, cp->num_gprs);
362
363 /* grid/block setup */
364 BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
365 PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
366 PUSH_DATA (push, info->grid[2]);
367 BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
368 PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
369 PUSH_DATA (push, info->block[2]);
370
371 /* launch preliminary setup */
372 BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
373 PUSH_DATA (push, 0x1);
374 BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
375 PUSH_DATA (push, 0);
376 BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
377 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
378
379 /* kernel launching */
380 BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
381 PUSH_DATA (push, 0);
382 BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
383 PUSH_DATA (push, 0);
384 BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
385 PUSH_DATA (push, 0x1000);
386 BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
387 PUSH_DATA (push, 0);
388 BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
389 PUSH_DATA (push, 0x1);
390
391 /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
392 nvc0->dirty |= NVC0_NEW_CONSTBUF;
393 for (s = 0; s < 5; s++) {
394 nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
395 nvc0->state.uniform_buffer_bound[s] = 0;
396 }
397 }