2 * Copyright 2013 Nouveau Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Christoph Bumiller, Samuel Pitoiset
25 #include "nvc0/nvc0_context.h"
27 #include "nvc0/nvc0_compute.xml.h"
30 nvc0_screen_compute_setup(struct nvc0_screen
*screen
,
31 struct nouveau_pushbuf
*push
)
33 struct nouveau_object
*chan
= screen
->base
.channel
;
34 struct nouveau_device
*dev
= screen
->base
.device
;
39 switch (dev
->chipset
& ~0xf) {
42 /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
43 * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
44 obj_class
= NVC0_COMPUTE_CLASS
;
47 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev
->chipset
);
51 ret
= nouveau_object_new(chan
, 0xbeef90c0, obj_class
, NULL
, 0,
54 NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret
);
58 BEGIN_NVC0(push
, SUBC_CP(NV01_SUBCHAN_OBJECT
), 1);
59 PUSH_DATA (push
, screen
->compute
->oclass
);
62 BEGIN_NVC0(push
, NVC0_CP(MP_LIMIT
), 1);
63 PUSH_DATA (push
, screen
->mp_count
);
64 BEGIN_NVC0(push
, NVC0_CP(CALL_LIMIT_LOG
), 1);
65 PUSH_DATA (push
, 0xf);
67 BEGIN_NVC0(push
, SUBC_CP(0x02a0), 1);
68 PUSH_DATA (push
, 0x8000);
70 /* global memory setup */
71 BEGIN_NVC0(push
, SUBC_CP(0x02c4), 1);
73 BEGIN_NIC0(push
, NVC0_CP(GLOBAL_BASE
), 0x100);
74 for (i
= 0; i
<= 0xff; i
++)
75 PUSH_DATA (push
, (0xc << 28) | (i
<< 16) | i
);
76 BEGIN_NVC0(push
, SUBC_CP(0x02c4), 1);
79 /* local memory and cstack setup */
80 BEGIN_NVC0(push
, NVC0_CP(TEMP_ADDRESS_HIGH
), 2);
81 PUSH_DATAh(push
, screen
->tls
->offset
);
82 PUSH_DATA (push
, screen
->tls
->offset
);
83 BEGIN_NVC0(push
, NVC0_CP(TEMP_SIZE_HIGH
), 2);
84 PUSH_DATAh(push
, screen
->tls
->size
);
85 PUSH_DATA (push
, screen
->tls
->size
);
86 BEGIN_NVC0(push
, NVC0_CP(WARP_TEMP_ALLOC
), 1);
88 BEGIN_NVC0(push
, NVC0_CP(LOCAL_BASE
), 1);
89 PUSH_DATA (push
, 0xff << 24);
91 /* shared memory setup */
92 BEGIN_NVC0(push
, NVC0_CP(CACHE_SPLIT
), 1);
93 PUSH_DATA (push
, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1
);
94 BEGIN_NVC0(push
, NVC0_CP(SHARED_BASE
), 1);
95 PUSH_DATA (push
, 0xfe << 24);
96 BEGIN_NVC0(push
, NVC0_CP(SHARED_SIZE
), 1);
99 /* code segment setup */
100 BEGIN_NVC0(push
, NVC0_CP(CODE_ADDRESS_HIGH
), 2);
101 PUSH_DATAh(push
, screen
->text
->offset
);
102 PUSH_DATA (push
, screen
->text
->offset
);
105 BEGIN_NVC0(push
, NVC0_CP(TIC_ADDRESS_HIGH
), 3);
106 PUSH_DATAh(push
, screen
->txc
->offset
);
107 PUSH_DATA (push
, screen
->txc
->offset
);
108 PUSH_DATA (push
, NVC0_TIC_MAX_ENTRIES
- 1);
111 BEGIN_NVC0(push
, NVC0_CP(TSC_ADDRESS_HIGH
), 3);
112 PUSH_DATAh(push
, screen
->txc
->offset
+ 65536);
113 PUSH_DATA (push
, screen
->txc
->offset
+ 65536);
114 PUSH_DATA (push
, NVC0_TSC_MAX_ENTRIES
- 1);
120 nvc0_compute_validate_samplers(struct nvc0_context
*nvc0
)
122 bool need_flush
= nvc0_validate_tsc(nvc0
, 5);
124 BEGIN_NVC0(nvc0
->base
.pushbuf
, NVC0_CP(TSC_FLUSH
), 1);
125 PUSH_DATA (nvc0
->base
.pushbuf
, 0);
128 /* Invalidate all 3D samplers because they are aliased. */
129 for (int s
= 0; s
< 5; s
++)
130 nvc0
->samplers_dirty
[s
] = ~0;
131 nvc0
->dirty_3d
|= NVC0_NEW_3D_SAMPLERS
;
135 nvc0_compute_validate_textures(struct nvc0_context
*nvc0
)
137 bool need_flush
= nvc0_validate_tic(nvc0
, 5);
139 BEGIN_NVC0(nvc0
->base
.pushbuf
, NVC0_CP(TIC_FLUSH
), 1);
140 PUSH_DATA (nvc0
->base
.pushbuf
, 0);
143 /* Invalidate all 3D textures because they are aliased. */
144 for (int s
= 0; s
< 5; s
++) {
145 for (int i
= 0; i
< nvc0
->num_textures
[s
]; i
++)
146 nouveau_bufctx_reset(nvc0
->bufctx_3d
, NVC0_BIND_3D_TEX(s
, i
));
147 nvc0
->textures_dirty
[s
] = ~0;
149 nvc0
->dirty_3d
|= NVC0_NEW_3D_TEXTURES
;
153 nvc0_compute_invalidate_constbufs(struct nvc0_context
*nvc0
)
157 /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
158 for (s
= 0; s
< 5; s
++) {
159 nvc0
->constbuf_dirty
[s
] |= nvc0
->constbuf_valid
[s
];
160 nvc0
->state
.uniform_buffer_bound
[s
] = 0;
162 nvc0
->dirty_3d
|= NVC0_NEW_3D_CONSTBUF
;
166 nvc0_compute_validate_constbufs(struct nvc0_context
*nvc0
)
168 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
171 while (nvc0
->constbuf_dirty
[s
]) {
172 int i
= ffs(nvc0
->constbuf_dirty
[s
]) - 1;
173 nvc0
->constbuf_dirty
[s
] &= ~(1 << i
);
175 if (nvc0
->constbuf
[s
][i
].user
) {
176 struct nouveau_bo
*bo
= nvc0
->screen
->uniform_bo
;
177 const unsigned base
= NVC0_CB_USR_INFO(s
);
178 const unsigned size
= nvc0
->constbuf
[s
][0].size
;
179 assert(i
== 0); /* we really only want OpenGL uniforms here */
180 assert(nvc0
->constbuf
[s
][0].u
.data
);
182 if (nvc0
->state
.uniform_buffer_bound
[s
] < size
) {
183 nvc0
->state
.uniform_buffer_bound
[s
] = align(size
, 0x100);
185 BEGIN_NVC0(push
, NVC0_CP(CB_SIZE
), 3);
186 PUSH_DATA (push
, nvc0
->state
.uniform_buffer_bound
[s
]);
187 PUSH_DATAh(push
, bo
->offset
+ base
);
188 PUSH_DATA (push
, bo
->offset
+ base
);
189 BEGIN_NVC0(push
, NVC0_CP(CB_BIND
), 1);
190 PUSH_DATA (push
, (0 << 8) | 1);
192 nvc0_cb_bo_push(&nvc0
->base
, bo
, NV_VRAM_DOMAIN(&nvc0
->screen
->base
),
193 base
, nvc0
->state
.uniform_buffer_bound
[s
],
195 nvc0
->constbuf
[s
][0].u
.data
);
197 struct nv04_resource
*res
=
198 nv04_resource(nvc0
->constbuf
[s
][i
].u
.buf
);
200 BEGIN_NVC0(push
, NVC0_CP(CB_SIZE
), 3);
201 PUSH_DATA (push
, nvc0
->constbuf
[s
][i
].size
);
202 PUSH_DATAh(push
, res
->address
+ nvc0
->constbuf
[s
][i
].offset
);
203 PUSH_DATA (push
, res
->address
+ nvc0
->constbuf
[s
][i
].offset
);
204 BEGIN_NVC0(push
, NVC0_CP(CB_BIND
), 1);
205 PUSH_DATA (push
, (i
<< 8) | 1);
207 BCTX_REFN(nvc0
->bufctx_cp
, CP_CB(i
), res
, RD
);
209 res
->cb_bindings
[s
] |= 1 << i
;
211 BEGIN_NVC0(push
, NVC0_CP(CB_BIND
), 1);
212 PUSH_DATA (push
, (i
<< 8) | 0);
215 nvc0
->state
.uniform_buffer_bound
[s
] = 0;
219 nvc0_compute_invalidate_constbufs(nvc0
);
221 BEGIN_NVC0(push
, NVC0_CP(FLUSH
), 1);
222 PUSH_DATA (push
, NVC0_COMPUTE_FLUSH_CB
);
226 nvc0_compute_validate_driverconst(struct nvc0_context
*nvc0
)
228 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
229 struct nvc0_screen
*screen
= nvc0
->screen
;
231 BEGIN_NVC0(push
, NVC0_CP(CB_SIZE
), 3);
232 PUSH_DATA (push
, 2048);
233 PUSH_DATAh(push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(5));
234 PUSH_DATA (push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(5));
235 BEGIN_NVC0(push
, NVC0_CP(CB_BIND
), 1);
236 PUSH_DATA (push
, (15 << 8) | 1);
238 nvc0
->dirty_3d
|= NVC0_NEW_3D_DRIVERCONST
;
242 nvc0_compute_validate_buffers(struct nvc0_context
*nvc0
)
244 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
245 struct nvc0_screen
*screen
= nvc0
->screen
;
249 BEGIN_NVC0(push
, NVC0_CP(CB_SIZE
), 3);
250 PUSH_DATA (push
, 2048);
251 PUSH_DATAh(push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(s
));
252 PUSH_DATA (push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(s
));
253 BEGIN_1IC0(push
, NVC0_CP(CB_POS
), 1 + 4 * NVC0_MAX_BUFFERS
);
254 PUSH_DATA (push
, NVC0_CB_AUX_BUF_INFO(0));
256 for (i
= 0; i
< NVC0_MAX_BUFFERS
; i
++) {
257 if (nvc0
->buffers
[s
][i
].buffer
) {
258 struct nv04_resource
*res
=
259 nv04_resource(nvc0
->buffers
[s
][i
].buffer
);
260 PUSH_DATA (push
, res
->address
+ nvc0
->buffers
[s
][i
].buffer_offset
);
261 PUSH_DATAh(push
, res
->address
+ nvc0
->buffers
[s
][i
].buffer_offset
);
262 PUSH_DATA (push
, nvc0
->buffers
[s
][i
].buffer_size
);
264 BCTX_REFN(nvc0
->bufctx_cp
, CP_BUF
, res
, RDWR
);
275 nvc0_compute_validate_globals(struct nvc0_context
*nvc0
)
279 for (i
= 0; i
< nvc0
->global_residents
.size
/ sizeof(struct pipe_resource
*);
281 struct pipe_resource
*res
= *util_dynarray_element(
282 &nvc0
->global_residents
, struct pipe_resource
*, i
);
284 nvc0_add_resident(nvc0
->bufctx_cp
, NVC0_BIND_CP_GLOBAL
,
285 nv04_resource(res
), NOUVEAU_BO_RDWR
);
290 nvc0_compute_invalidate_surfaces(struct nvc0_context
*nvc0
, const int s
)
292 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
295 for (i
= 0; i
< NVC0_MAX_IMAGES
; ++i
) {
297 BEGIN_NVC0(push
, NVC0_CP(IMAGE(i
)), 6);
299 BEGIN_NVC0(push
, NVC0_3D(IMAGE(i
)), 6);
304 PUSH_DATA(push
, 0x14000);
310 nvc0_compute_validate_surfaces(struct nvc0_context
*nvc0
)
312 /* TODO: Invalidating both 3D and CP surfaces before validating surfaces for
313 * compute is probably not really necessary, but we didn't find any better
314 * solutions for now. This fixes some invalidation issues when compute and
315 * fragment shaders are used inside the same context. Anyway, we definitely
316 * have invalidation issues between 3D and CP for other resources like SSBO
317 * and atomic counters. */
318 nvc0_compute_invalidate_surfaces(nvc0
, 4);
319 nvc0_compute_invalidate_surfaces(nvc0
, 5);
321 nvc0_validate_suf(nvc0
, 5);
323 /* Invalidate all FRAGMENT images because they are aliased with COMPUTE. */
324 nvc0
->dirty_3d
|= NVC0_NEW_3D_SURFACES
;
325 nvc0
->images_dirty
[4] |= nvc0
->images_valid
[4];
328 static struct nvc0_state_validate
329 validate_list_cp
[] = {
330 { nvc0_compprog_validate
, NVC0_NEW_CP_PROGRAM
},
331 { nvc0_compute_validate_constbufs
, NVC0_NEW_CP_CONSTBUF
},
332 { nvc0_compute_validate_driverconst
, NVC0_NEW_CP_DRIVERCONST
},
333 { nvc0_compute_validate_buffers
, NVC0_NEW_CP_BUFFERS
},
334 { nvc0_compute_validate_textures
, NVC0_NEW_CP_TEXTURES
},
335 { nvc0_compute_validate_samplers
, NVC0_NEW_CP_SAMPLERS
},
336 { nvc0_compute_validate_globals
, NVC0_NEW_CP_GLOBALS
},
337 { nvc0_compute_validate_surfaces
, NVC0_NEW_CP_SURFACES
},
341 nvc0_state_validate_cp(struct nvc0_context
*nvc0
, uint32_t mask
)
345 ret
= nvc0_state_validate(nvc0
, mask
, validate_list_cp
,
346 ARRAY_SIZE(validate_list_cp
), &nvc0
->dirty_cp
,
349 if (unlikely(nvc0
->state
.flushed
))
350 nvc0_bufctx_fence(nvc0
, nvc0
->bufctx_cp
, true);
355 nvc0_compute_upload_input(struct nvc0_context
*nvc0
, const void *input
)
357 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
358 struct nvc0_screen
*screen
= nvc0
->screen
;
359 struct nvc0_program
*cp
= nvc0
->compprog
;
362 struct nouveau_bo
*bo
= screen
->uniform_bo
;
363 const unsigned base
= NVC0_CB_USR_INFO(5);
365 BEGIN_NVC0(push
, NVC0_CP(CB_SIZE
), 3);
366 PUSH_DATA (push
, align(cp
->parm_size
, 0x100));
367 PUSH_DATAh(push
, bo
->offset
+ base
);
368 PUSH_DATA (push
, bo
->offset
+ base
);
369 BEGIN_NVC0(push
, NVC0_CP(CB_BIND
), 1);
370 PUSH_DATA (push
, (0 << 8) | 1);
371 /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
372 BEGIN_1IC0(push
, NVC0_CP(CB_POS
), 1 + cp
->parm_size
/ 4);
374 PUSH_DATAp(push
, input
, cp
->parm_size
/ 4);
376 nvc0_compute_invalidate_constbufs(nvc0
);
378 BEGIN_NVC0(push
, NVC0_CP(FLUSH
), 1);
379 PUSH_DATA (push
, NVC0_COMPUTE_FLUSH_CB
);
384 nvc0_launch_grid(struct pipe_context
*pipe
, const struct pipe_grid_info
*info
)
386 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
387 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
388 struct nvc0_program
*cp
= nvc0
->compprog
;
391 ret
= !nvc0_state_validate_cp(nvc0
, ~0);
393 NOUVEAU_ERR("Failed to launch grid !\n");
397 nvc0_compute_upload_input(nvc0
, info
->input
);
399 BEGIN_NVC0(push
, NVC0_CP(CP_START_ID
), 1);
400 PUSH_DATA (push
, nvc0_program_symbol_offset(cp
, info
->pc
));
402 BEGIN_NVC0(push
, NVC0_CP(LOCAL_POS_ALLOC
), 3);
403 PUSH_DATA (push
, (cp
->hdr
[1] & 0xfffff0) + align(cp
->cp
.lmem_size
, 0x10));
405 PUSH_DATA (push
, 0x800); /* WARP_CSTACK_SIZE */
407 BEGIN_NVC0(push
, NVC0_CP(SHARED_SIZE
), 3);
408 PUSH_DATA (push
, align(cp
->cp
.smem_size
, 0x100));
409 PUSH_DATA (push
, info
->block
[0] * info
->block
[1] * info
->block
[2]);
410 PUSH_DATA (push
, cp
->num_barriers
);
411 BEGIN_NVC0(push
, NVC0_CP(CP_GPR_ALLOC
), 1);
412 PUSH_DATA (push
, cp
->num_gprs
);
414 /* launch preliminary setup */
415 BEGIN_NVC0(push
, NVC0_CP(GRIDID
), 1);
416 PUSH_DATA (push
, 0x1);
417 BEGIN_NVC0(push
, SUBC_CP(0x036c), 1);
419 BEGIN_NVC0(push
, NVC0_CP(FLUSH
), 1);
420 PUSH_DATA (push
, NVC0_COMPUTE_FLUSH_GLOBAL
| NVC0_COMPUTE_FLUSH_UNK8
);
423 BEGIN_NVC0(push
, NVC0_CP(BLOCKDIM_YX
), 2);
424 PUSH_DATA (push
, (info
->block
[1] << 16) | info
->block
[0]);
425 PUSH_DATA (push
, info
->block
[2]);
427 if (unlikely(info
->indirect
)) {
428 struct nv04_resource
*res
= nv04_resource(info
->indirect
);
429 uint32_t offset
= res
->offset
+ info
->indirect_offset
;
430 unsigned macro
= NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT
;
432 nouveau_pushbuf_space(push
, 16, 0, 1);
433 PUSH_REFN(push
, res
->bo
, NOUVEAU_BO_RD
| res
->domain
);
434 PUSH_DATA(push
, NVC0_FIFO_PKHDR_1I(1, macro
, 3));
435 nouveau_pushbuf_data(push
, res
->bo
, offset
,
436 NVC0_IB_ENTRY_1_NO_PREFETCH
| 3 * 4);
439 BEGIN_NVC0(push
, NVC0_CP(GRIDDIM_YX
), 2);
440 PUSH_DATA (push
, (info
->grid
[1] << 16) | info
->grid
[0]);
441 PUSH_DATA (push
, info
->grid
[2]);
443 /* kernel launching */
444 BEGIN_NVC0(push
, NVC0_CP(COMPUTE_BEGIN
), 1);
446 BEGIN_NVC0(push
, SUBC_CP(0x0a08), 1);
448 BEGIN_NVC0(push
, NVC0_CP(LAUNCH
), 1);
449 PUSH_DATA (push
, 0x1000);
450 BEGIN_NVC0(push
, NVC0_CP(COMPUTE_END
), 1);
452 BEGIN_NVC0(push
, SUBC_CP(0x0360), 1);
453 PUSH_DATA (push
, 0x1);
456 /* TODO: Not sure if this is really necessary. */
457 nvc0_compute_invalidate_surfaces(nvc0
, 5);