2 * Copyright 2013 Nouveau Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Christoph Bumiller, Samuel Pitoiset
25 #include "nvc0/nvc0_context.h"
26 #include "nvc0/nvc0_compute.h"
29 nvc0_screen_compute_setup(struct nvc0_screen
*screen
,
30 struct nouveau_pushbuf
*push
)
32 struct nouveau_object
*chan
= screen
->base
.channel
;
33 struct nouveau_device
*dev
= screen
->base
.device
;
38 switch (dev
->chipset
& ~0xf) {
41 /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
42 * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
43 obj_class
= NVC0_COMPUTE_CLASS
;
46 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev
->chipset
);
50 ret
= nouveau_object_new(chan
, 0xbeef90c0, obj_class
, NULL
, 0,
53 NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret
);
57 ret
= nouveau_bo_new(dev
, NV_VRAM_DOMAIN(&screen
->base
), 0, 1 << 12, NULL
,
62 BEGIN_NVC0(push
, SUBC_CP(NV01_SUBCHAN_OBJECT
), 1);
63 PUSH_DATA (push
, screen
->compute
->oclass
);
66 BEGIN_NVC0(push
, NVC0_CP(MP_LIMIT
), 1);
67 PUSH_DATA (push
, screen
->mp_count
);
68 BEGIN_NVC0(push
, NVC0_CP(CALL_LIMIT_LOG
), 1);
69 PUSH_DATA (push
, 0xf);
71 BEGIN_NVC0(push
, SUBC_CP(0x02a0), 1);
72 PUSH_DATA (push
, 0x8000);
74 /* global memory setup */
75 BEGIN_NVC0(push
, SUBC_CP(0x02c4), 1);
77 BEGIN_NIC0(push
, NVC0_CP(GLOBAL_BASE
), 0x100);
78 for (i
= 0; i
<= 0xff; i
++)
79 PUSH_DATA (push
, (0xc << 28) | (i
<< 16) | i
);
80 BEGIN_NVC0(push
, SUBC_CP(0x02c4), 1);
83 /* local memory and cstack setup */
84 BEGIN_NVC0(push
, NVC0_CP(TEMP_ADDRESS_HIGH
), 2);
85 PUSH_DATAh(push
, screen
->tls
->offset
);
86 PUSH_DATA (push
, screen
->tls
->offset
);
87 BEGIN_NVC0(push
, NVC0_CP(TEMP_SIZE_HIGH
), 2);
88 PUSH_DATAh(push
, screen
->tls
->size
);
89 PUSH_DATA (push
, screen
->tls
->size
);
90 BEGIN_NVC0(push
, NVC0_CP(WARP_TEMP_ALLOC
), 1);
92 BEGIN_NVC0(push
, NVC0_CP(LOCAL_BASE
), 1);
93 PUSH_DATA (push
, 0xff << 24);
95 /* shared memory setup */
96 BEGIN_NVC0(push
, NVC0_CP(CACHE_SPLIT
), 1);
97 PUSH_DATA (push
, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1
);
98 BEGIN_NVC0(push
, NVC0_CP(SHARED_BASE
), 1);
99 PUSH_DATA (push
, 0xfe << 24);
100 BEGIN_NVC0(push
, NVC0_CP(SHARED_SIZE
), 1);
103 /* code segment setup */
104 BEGIN_NVC0(push
, NVC0_CP(CODE_ADDRESS_HIGH
), 2);
105 PUSH_DATAh(push
, screen
->text
->offset
);
106 PUSH_DATA (push
, screen
->text
->offset
);
109 BEGIN_NVC0(push
, NVC0_CP(TIC_ADDRESS_HIGH
), 3);
110 PUSH_DATAh(push
, screen
->txc
->offset
);
111 PUSH_DATA (push
, screen
->txc
->offset
);
112 PUSH_DATA (push
, NVC0_TIC_MAX_ENTRIES
- 1);
115 BEGIN_NVC0(push
, NVC0_CP(TSC_ADDRESS_HIGH
), 3);
116 PUSH_DATAh(push
, screen
->txc
->offset
+ 65536);
117 PUSH_DATA (push
, screen
->txc
->offset
+ 65536);
118 PUSH_DATA (push
, NVC0_TSC_MAX_ENTRIES
- 1);
124 nvc0_compute_validate_program(struct nvc0_context
*nvc0
)
126 struct nvc0_program
*prog
= nvc0
->compprog
;
131 if (!prog
->translated
) {
132 prog
->translated
= nvc0_program_translate(
133 prog
, nvc0
->screen
->base
.device
->chipset
, &nvc0
->base
.debug
);
134 if (!prog
->translated
)
137 if (unlikely(!prog
->code_size
))
140 if (likely(prog
->code_size
)) {
141 if (nvc0_program_upload_code(nvc0
, prog
)) {
142 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
143 BEGIN_NVC0(push
, NVC0_CP(FLUSH
), 1);
144 PUSH_DATA (push
, NVC0_COMPUTE_FLUSH_CODE
);
152 nvc0_compute_validate_samplers(struct nvc0_context
*nvc0
)
154 bool need_flush
= nvc0_validate_tsc(nvc0
, 5);
156 BEGIN_NVC0(nvc0
->base
.pushbuf
, NVC0_CP(TSC_FLUSH
), 1);
157 PUSH_DATA (nvc0
->base
.pushbuf
, 0);
162 nvc0_compute_validate_textures(struct nvc0_context
*nvc0
)
164 bool need_flush
= nvc0_validate_tic(nvc0
, 5);
166 BEGIN_NVC0(nvc0
->base
.pushbuf
, NVC0_CP(TIC_FLUSH
), 1);
167 PUSH_DATA (nvc0
->base
.pushbuf
, 0);
172 nvc0_compute_validate_constbufs(struct nvc0_context
*nvc0
)
174 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
177 while (nvc0
->constbuf_dirty
[s
]) {
178 int i
= ffs(nvc0
->constbuf_dirty
[s
]) - 1;
179 nvc0
->constbuf_dirty
[s
] &= ~(1 << i
);
181 if (nvc0
->constbuf
[s
][i
].user
) {
182 struct nouveau_bo
*bo
= nvc0
->screen
->uniform_bo
;
183 const unsigned base
= s
<< 16;
184 const unsigned size
= nvc0
->constbuf
[s
][0].size
;
185 assert(i
== 0); /* we really only want OpenGL uniforms here */
186 assert(nvc0
->constbuf
[s
][0].u
.data
);
188 if (nvc0
->state
.uniform_buffer_bound
[s
] < size
) {
189 nvc0
->state
.uniform_buffer_bound
[s
] = align(size
, 0x100);
191 BEGIN_NVC0(push
, NVC0_CP(CB_SIZE
), 3);
192 PUSH_DATA (push
, nvc0
->state
.uniform_buffer_bound
[s
]);
193 PUSH_DATAh(push
, bo
->offset
+ base
);
194 PUSH_DATA (push
, bo
->offset
+ base
);
195 BEGIN_NVC0(push
, NVC0_CP(CB_BIND
), 1);
196 PUSH_DATA (push
, (0 << 8) | 1);
198 nvc0_cb_bo_push(&nvc0
->base
, bo
, NV_VRAM_DOMAIN(&nvc0
->screen
->base
),
199 base
, nvc0
->state
.uniform_buffer_bound
[s
],
201 nvc0
->constbuf
[s
][0].u
.data
);
203 struct nv04_resource
*res
=
204 nv04_resource(nvc0
->constbuf
[s
][i
].u
.buf
);
206 BEGIN_NVC0(push
, NVC0_CP(CB_SIZE
), 3);
207 PUSH_DATA (push
, nvc0
->constbuf
[s
][i
].size
);
208 PUSH_DATAh(push
, res
->address
+ nvc0
->constbuf
[s
][i
].offset
);
209 PUSH_DATA (push
, res
->address
+ nvc0
->constbuf
[s
][i
].offset
);
210 BEGIN_NVC0(push
, NVC0_CP(CB_BIND
), 1);
211 PUSH_DATA (push
, (i
<< 8) | 1);
213 BCTX_REFN(nvc0
->bufctx_cp
, CP_CB(i
), res
, RD
);
215 res
->cb_bindings
[s
] |= 1 << i
;
217 BEGIN_NVC0(push
, NVC0_CP(CB_BIND
), 1);
218 PUSH_DATA (push
, (i
<< 8) | 0);
221 nvc0
->state
.uniform_buffer_bound
[s
] = 0;
225 BEGIN_NVC0(push
, NVC0_CP(FLUSH
), 1);
226 PUSH_DATA (push
, NVC0_COMPUTE_FLUSH_CB
);
230 nvc0_compute_validate_driverconst(struct nvc0_context
*nvc0
)
232 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
233 struct nvc0_screen
*screen
= nvc0
->screen
;
235 BEGIN_NVC0(push
, NVC0_CP(CB_SIZE
), 3);
236 PUSH_DATA (push
, 1024);
237 PUSH_DATAh(push
, screen
->uniform_bo
->offset
+ (6 << 16) + (5 << 10));
238 PUSH_DATA (push
, screen
->uniform_bo
->offset
+ (6 << 16) + (5 << 10));
239 BEGIN_NVC0(push
, NVC0_CP(CB_BIND
), 1);
240 PUSH_DATA (push
, (15 << 8) | 1);
242 nvc0
->dirty_3d
|= NVC0_NEW_3D_DRIVERCONST
;
246 nvc0_compute_validate_buffers(struct nvc0_context
*nvc0
)
248 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
252 BEGIN_NVC0(push
, NVC0_CP(CB_SIZE
), 3);
253 PUSH_DATA (push
, 1024);
254 PUSH_DATAh(push
, nvc0
->screen
->uniform_bo
->offset
+ (6 << 16) + (s
<< 10));
255 PUSH_DATA (push
, nvc0
->screen
->uniform_bo
->offset
+ (6 << 16) + (s
<< 10));
256 BEGIN_1IC0(push
, NVC0_CP(CB_POS
), 1 + 4 * NVC0_MAX_BUFFERS
);
257 PUSH_DATA (push
, 512);
259 for (i
= 0; i
< NVC0_MAX_BUFFERS
; i
++) {
260 if (nvc0
->buffers
[s
][i
].buffer
) {
261 struct nv04_resource
*res
=
262 nv04_resource(nvc0
->buffers
[s
][i
].buffer
);
263 PUSH_DATA (push
, res
->address
+ nvc0
->buffers
[s
][i
].buffer_offset
);
264 PUSH_DATAh(push
, res
->address
+ nvc0
->buffers
[s
][i
].buffer_offset
);
265 PUSH_DATA (push
, nvc0
->buffers
[s
][i
].buffer_size
);
267 BCTX_REFN(nvc0
->bufctx_cp
, CP_BUF
, res
, RDWR
);
278 nvc0_compute_validate_globals(struct nvc0_context
*nvc0
)
282 for (i
= 0; i
< nvc0
->global_residents
.size
/ sizeof(struct pipe_resource
*);
284 struct pipe_resource
*res
= *util_dynarray_element(
285 &nvc0
->global_residents
, struct pipe_resource
*, i
);
287 nvc0_add_resident(nvc0
->bufctx_cp
, NVC0_BIND_CP_GLOBAL
,
288 nv04_resource(res
), NOUVEAU_BO_RDWR
);
293 nvc0_compute_state_validate(struct nvc0_context
*nvc0
)
295 if (!nvc0_compute_validate_program(nvc0
))
297 if (nvc0
->dirty_cp
& NVC0_NEW_CP_CONSTBUF
)
298 nvc0_compute_validate_constbufs(nvc0
);
299 if (nvc0
->dirty_cp
& NVC0_NEW_CP_DRIVERCONST
)
300 nvc0_compute_validate_driverconst(nvc0
);
301 if (nvc0
->dirty_cp
& NVC0_NEW_CP_BUFFERS
)
302 nvc0_compute_validate_buffers(nvc0
);
303 if (nvc0
->dirty_cp
& NVC0_NEW_CP_TEXTURES
)
304 nvc0_compute_validate_textures(nvc0
);
305 if (nvc0
->dirty_cp
& NVC0_NEW_CP_SAMPLERS
)
306 nvc0_compute_validate_samplers(nvc0
);
308 /* TODO: surfaces, global memory buffers */
310 nvc0_bufctx_fence(nvc0
, nvc0
->bufctx_cp
, false);
312 nouveau_pushbuf_bufctx(nvc0
->base
.pushbuf
, nvc0
->bufctx_cp
);
313 if (unlikely(nouveau_pushbuf_validate(nvc0
->base
.pushbuf
)))
315 if (unlikely(nvc0
->state
.flushed
))
316 nvc0_bufctx_fence(nvc0
, nvc0
->bufctx_cp
, true);
323 nvc0_compute_upload_input(struct nvc0_context
*nvc0
, const void *input
)
325 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
326 struct nvc0_screen
*screen
= nvc0
->screen
;
327 struct nvc0_program
*cp
= nvc0
->compprog
;
330 BEGIN_NVC0(push
, NVC0_CP(CB_SIZE
), 3);
331 PUSH_DATA (push
, align(cp
->parm_size
, 0x100));
332 PUSH_DATAh(push
, screen
->parm
->offset
);
333 PUSH_DATA (push
, screen
->parm
->offset
);
334 BEGIN_NVC0(push
, NVC0_CP(CB_BIND
), 1);
335 PUSH_DATA (push
, (0 << 8) | 1);
336 /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
337 BEGIN_1IC0(push
, NVC0_CP(CB_POS
), 1 + cp
->parm_size
/ 4);
339 PUSH_DATAp(push
, input
, cp
->parm_size
/ 4);
341 BEGIN_NVC0(push
, NVC0_CP(FLUSH
), 1);
342 PUSH_DATA (push
, NVC0_COMPUTE_FLUSH_CB
);
347 nvc0_launch_grid(struct pipe_context
*pipe
, const struct pipe_grid_info
*info
)
349 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
350 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
351 struct nvc0_program
*cp
= nvc0
->compprog
;
355 ret
= !nvc0_compute_state_validate(nvc0
);
357 NOUVEAU_ERR("Failed to launch grid !\n");
361 nvc0_compute_upload_input(nvc0
, info
->input
);
363 BEGIN_NVC0(push
, NVC0_CP(CP_START_ID
), 1);
364 PUSH_DATA (push
, nvc0_program_symbol_offset(cp
, info
->pc
));
366 BEGIN_NVC0(push
, NVC0_CP(LOCAL_POS_ALLOC
), 3);
367 PUSH_DATA (push
, align(cp
->cp
.lmem_size
, 0x10));
369 PUSH_DATA (push
, 0x800); /* WARP_CSTACK_SIZE */
371 BEGIN_NVC0(push
, NVC0_CP(SHARED_SIZE
), 3);
372 PUSH_DATA (push
, align(cp
->cp
.smem_size
, 0x100));
373 PUSH_DATA (push
, info
->block
[0] * info
->block
[1] * info
->block
[2]);
374 PUSH_DATA (push
, cp
->num_barriers
);
375 BEGIN_NVC0(push
, NVC0_CP(CP_GPR_ALLOC
), 1);
376 PUSH_DATA (push
, cp
->num_gprs
);
378 /* launch preliminary setup */
379 BEGIN_NVC0(push
, NVC0_CP(GRIDID
), 1);
380 PUSH_DATA (push
, 0x1);
381 BEGIN_NVC0(push
, SUBC_CP(0x036c), 1);
383 BEGIN_NVC0(push
, NVC0_CP(FLUSH
), 1);
384 PUSH_DATA (push
, NVC0_COMPUTE_FLUSH_GLOBAL
| NVC0_COMPUTE_FLUSH_UNK8
);
387 BEGIN_NVC0(push
, NVC0_CP(BLOCKDIM_YX
), 2);
388 PUSH_DATA (push
, (info
->block
[1] << 16) | info
->block
[0]);
389 PUSH_DATA (push
, info
->block
[2]);
391 if (unlikely(info
->indirect
)) {
392 struct nv04_resource
*res
= nv04_resource(info
->indirect
);
393 uint32_t offset
= res
->offset
+ info
->indirect_offset
;
394 unsigned macro
= NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT
;
396 nouveau_pushbuf_space(push
, 16, 0, 1);
397 PUSH_REFN(push
, res
->bo
, NOUVEAU_BO_RD
| res
->domain
);
398 PUSH_DATA(push
, NVC0_FIFO_PKHDR_1I(1, macro
, 3));
399 nouveau_pushbuf_data(push
, res
->bo
, offset
,
400 NVC0_IB_ENTRY_1_NO_PREFETCH
| 3 * 4);
403 BEGIN_NVC0(push
, NVC0_CP(GRIDDIM_YX
), 2);
404 PUSH_DATA (push
, (info
->grid
[1] << 16) | info
->grid
[0]);
405 PUSH_DATA (push
, info
->grid
[2]);
407 /* kernel launching */
408 BEGIN_NVC0(push
, NVC0_CP(COMPUTE_BEGIN
), 1);
410 BEGIN_NVC0(push
, SUBC_CP(0x0a08), 1);
412 BEGIN_NVC0(push
, NVC0_CP(LAUNCH
), 1);
413 PUSH_DATA (push
, 0x1000);
414 BEGIN_NVC0(push
, NVC0_CP(COMPUTE_END
), 1);
416 BEGIN_NVC0(push
, SUBC_CP(0x0360), 1);
417 PUSH_DATA (push
, 0x1);
420 /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
421 nvc0
->dirty_3d
|= NVC0_NEW_3D_CONSTBUF
;
422 for (s
= 0; s
< 5; s
++) {
423 nvc0
->constbuf_dirty
[s
] |= nvc0
->constbuf_valid
[s
];
424 nvc0
->state
.uniform_buffer_bound
[s
] = 0;