2 * Copyright 2013 Nouveau Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Christoph Bumiller, Samuel Pitoiset
25 #include "nvc0/nvc0_context.h"
26 #include "nvc0/nvc0_compute.h"
29 nvc0_screen_compute_setup(struct nvc0_screen
*screen
,
30 struct nouveau_pushbuf
*push
)
32 struct nouveau_object
*chan
= screen
->base
.channel
;
33 struct nouveau_device
*dev
= screen
->base
.device
;
38 switch (dev
->chipset
& 0xf0) {
40 if (dev
->chipset
== 0xc8)
41 obj_class
= NVC8_COMPUTE_CLASS
;
43 obj_class
= NVC0_COMPUTE_CLASS
;
46 obj_class
= NVC0_COMPUTE_CLASS
;
49 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev
->chipset
);
53 ret
= nouveau_object_new(chan
, 0xbeef90c0, obj_class
, NULL
, 0,
56 NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret
);
60 ret
= nouveau_bo_new(dev
, NOUVEAU_BO_VRAM
, 0, 1 << 12, NULL
,
65 BEGIN_NVC0(push
, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT
), 1);
66 PUSH_DATA (push
, screen
->compute
->oclass
);
69 BEGIN_NVC0(push
, NVC0_COMPUTE(MP_LIMIT
), 1);
70 PUSH_DATA (push
, screen
->mp_count
);
71 BEGIN_NVC0(push
, NVC0_COMPUTE(CALL_LIMIT_LOG
), 1);
72 PUSH_DATA (push
, 0xf);
74 BEGIN_NVC0(push
, SUBC_COMPUTE(0x02a0), 1);
75 PUSH_DATA (push
, 0x8000);
77 /* global memory setup */
78 BEGIN_NVC0(push
, SUBC_COMPUTE(0x02c4), 1);
80 BEGIN_NIC0(push
, NVC0_COMPUTE(GLOBAL_BASE
), 0x100);
81 for (i
= 0; i
<= 0xff; i
++)
82 PUSH_DATA (push
, (0xc << 28) | (i
<< 16) | i
);
83 BEGIN_NVC0(push
, SUBC_COMPUTE(0x02c4), 1);
86 /* local memory and cstack setup */
87 BEGIN_NVC0(push
, NVC0_COMPUTE(TEMP_ADDRESS_HIGH
), 2);
88 PUSH_DATAh(push
, screen
->tls
->offset
);
89 PUSH_DATA (push
, screen
->tls
->offset
);
90 BEGIN_NVC0(push
, NVC0_COMPUTE(TEMP_SIZE_HIGH
), 2);
91 PUSH_DATAh(push
, screen
->tls
->size
);
92 PUSH_DATA (push
, screen
->tls
->size
);
93 BEGIN_NVC0(push
, NVC0_COMPUTE(WARP_TEMP_ALLOC
), 1);
95 BEGIN_NVC0(push
, NVC0_COMPUTE(LOCAL_BASE
), 1);
96 PUSH_DATA (push
, 1 << 24);
98 /* shared memory setup */
99 BEGIN_NVC0(push
, NVC0_COMPUTE(CACHE_SPLIT
), 1);
100 PUSH_DATA (push
, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1
);
101 BEGIN_NVC0(push
, NVC0_COMPUTE(SHARED_BASE
), 1);
102 PUSH_DATA (push
, 2 << 24);
103 BEGIN_NVC0(push
, NVC0_COMPUTE(SHARED_SIZE
), 1);
106 /* code segment setup */
107 BEGIN_NVC0(push
, NVC0_COMPUTE(CODE_ADDRESS_HIGH
), 2);
108 PUSH_DATAh(push
, screen
->text
->offset
);
109 PUSH_DATA (push
, screen
->text
->offset
);
111 /* bind parameters buffer */
112 BEGIN_NVC0(push
, NVC0_COMPUTE(CB_SIZE
), 3);
113 PUSH_DATA (push
, screen
->parm
->size
);
114 PUSH_DATAh(push
, screen
->parm
->offset
);
115 PUSH_DATA (push
, screen
->parm
->offset
);
116 BEGIN_NVC0(push
, NVC0_COMPUTE(CB_BIND
), 1);
117 PUSH_DATA (push
, (0 << 8) | 1);
119 /* TODO: textures & samplers */
125 nvc0_compute_validate_program(struct nvc0_context
*nvc0
)
127 struct nvc0_program
*prog
= nvc0
->compprog
;
132 if (!prog
->translated
) {
133 prog
->translated
= nvc0_program_translate(
134 prog
, nvc0
->screen
->base
.device
->chipset
);
135 if (!prog
->translated
)
138 if (unlikely(!prog
->code_size
))
141 if (likely(prog
->code_size
)) {
142 if (nvc0_program_upload_code(nvc0
, prog
)) {
143 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
144 BEGIN_NVC0(push
, NVC0_COMPUTE(FLUSH
), 1);
145 PUSH_DATA (push
, NVC0_COMPUTE_FLUSH_CODE
);
153 nvc0_compute_state_validate(struct nvc0_context
*nvc0
)
155 if (!nvc0_compute_validate_program(nvc0
))
158 /* TODO: textures, samplers, surfaces, global memory buffers */
160 nvc0_bufctx_fence(nvc0
, nvc0
->bufctx_cp
, FALSE
);
162 nouveau_pushbuf_bufctx(nvc0
->base
.pushbuf
, nvc0
->bufctx_cp
);
163 if (unlikely(nouveau_pushbuf_validate(nvc0
->base
.pushbuf
)))
165 if (unlikely(nvc0
->state
.flushed
))
166 nvc0_bufctx_fence(nvc0
, nvc0
->bufctx_cp
, TRUE
);
173 nvc0_compute_upload_input(struct nvc0_context
*nvc0
, const void *input
)
175 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
176 struct nvc0_screen
*screen
= nvc0
->screen
;
177 struct nvc0_program
*cp
= nvc0
->compprog
;
180 BEGIN_NVC0(push
, NVC0_COMPUTE(CB_SIZE
), 3);
181 PUSH_DATA (push
, align(cp
->parm_size
, 0x100));
182 PUSH_DATAh(push
, screen
->parm
->offset
);
183 PUSH_DATA (push
, screen
->parm
->offset
);
184 BEGIN_NVC0(push
, NVC0_COMPUTE(CB_BIND
), 1);
185 PUSH_DATA (push
, (0 << 8) | 1);
186 /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
187 BEGIN_1IC0(push
, NVC0_COMPUTE(CB_POS
), 1 + cp
->parm_size
/ 4);
189 PUSH_DATAp(push
, input
, cp
->parm_size
/ 4);
191 BEGIN_NVC0(push
, NVC0_COMPUTE(FLUSH
), 1);
192 PUSH_DATA (push
, NVC0_COMPUTE_FLUSH_CB
);
197 nvc0_launch_grid(struct pipe_context
*pipe
,
198 const uint
*block_layout
, const uint
*grid_layout
,
202 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
203 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
204 struct nvc0_program
*cp
= nvc0
->compprog
;
208 ret
= !nvc0_compute_state_validate(nvc0
);
212 nvc0_compute_upload_input(nvc0
, input
);
214 BEGIN_NVC0(push
, NVC0_COMPUTE(CP_START_ID
), 1);
215 PUSH_DATA (push
, nvc0_program_symbol_offset(cp
, label
));
217 BEGIN_NVC0(push
, NVC0_COMPUTE(LOCAL_POS_ALLOC
), 3);
218 PUSH_DATA (push
, align(cp
->cp
.lmem_size
, 0x10));
220 PUSH_DATA (push
, 0x800); /* WARP_CSTACK_SIZE */
222 BEGIN_NVC0(push
, NVC0_COMPUTE(SHARED_SIZE
), 3);
223 PUSH_DATA (push
, align(cp
->cp
.smem_size
, 0x100));
224 PUSH_DATA (push
, block_layout
[0] * block_layout
[1] * block_layout
[2]);
225 PUSH_DATA (push
, cp
->num_barriers
);
226 BEGIN_NVC0(push
, NVC0_COMPUTE(CP_GPR_ALLOC
), 1);
227 PUSH_DATA (push
, cp
->num_gprs
);
229 /* grid/block setup */
230 BEGIN_NVC0(push
, NVC0_COMPUTE(GRIDDIM_YX
), 2);
231 PUSH_DATA (push
, (grid_layout
[1] << 16) | grid_layout
[0]);
232 PUSH_DATA (push
, grid_layout
[2]);
233 BEGIN_NVC0(push
, NVC0_COMPUTE(BLOCKDIM_YX
), 2);
234 PUSH_DATA (push
, (block_layout
[1] << 16) | block_layout
[0]);
235 PUSH_DATA (push
, block_layout
[2]);
237 /* launch preliminary setup */
238 BEGIN_NVC0(push
, NVC0_COMPUTE(GRIDID
), 1);
239 PUSH_DATA (push
, 0x1);
240 BEGIN_NVC0(push
, SUBC_COMPUTE(0x036c), 1);
242 BEGIN_NVC0(push
, NVC0_COMPUTE(FLUSH
), 1);
243 PUSH_DATA (push
, NVC0_COMPUTE_FLUSH_GLOBAL
| NVC0_COMPUTE_FLUSH_UNK8
);
245 /* kernel launching */
246 BEGIN_NVC0(push
, NVC0_COMPUTE(COMPUTE_BEGIN
), 1);
248 BEGIN_NVC0(push
, SUBC_COMPUTE(0x0a08), 1);
250 BEGIN_NVC0(push
, NVC0_COMPUTE(LAUNCH
), 1);
251 PUSH_DATA (push
, 0x1000);
252 BEGIN_NVC0(push
, NVC0_COMPUTE(COMPUTE_END
), 1);
254 BEGIN_NVC0(push
, SUBC_COMPUTE(0x0360), 1);
255 PUSH_DATA (push
, 0x1);
257 /* rebind all the 3D constant buffers
258 * (looks like binding a CB on COMPUTE clobbers 3D state) */
259 nvc0
->dirty
|= NVC0_NEW_CONSTBUF
;
260 for (s
= 0; s
< 6; s
++) {
261 for (i
= 0; i
< NVC0_MAX_PIPE_CONSTBUFS
; i
++)
262 if (nvc0
->constbuf
[s
][i
].u
.buf
)
263 nvc0
->constbuf_dirty
[s
] |= 1 << i
;
265 memset(nvc0
->state
.uniform_buffer_bound
, 0,
266 sizeof(nvc0
->state
.uniform_buffer_bound
));
270 NOUVEAU_ERR("Failed to launch grid !\n");