nouveau: use bool instead of boolean
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_compute.c
1 /*
2 * Copyright 2013 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller, Samuel Pitoiset
23 */
24
25 #include "nvc0/nvc0_context.h"
26 #include "nvc0/nvc0_compute.h"
27
28 int
29 nvc0_screen_compute_setup(struct nvc0_screen *screen,
30 struct nouveau_pushbuf *push)
31 {
32 struct nouveau_object *chan = screen->base.channel;
33 struct nouveau_device *dev = screen->base.device;
34 uint32_t obj_class;
35 int ret;
36 int i;
37
38 switch (dev->chipset & ~0xf) {
39 case 0xc0:
40 if (dev->chipset == 0xc8)
41 obj_class = NVC8_COMPUTE_CLASS;
42 else
43 obj_class = NVC0_COMPUTE_CLASS;
44 break;
45 case 0xd0:
46 obj_class = NVC0_COMPUTE_CLASS;
47 break;
48 default:
49 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
50 return -1;
51 }
52
53 ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
54 &screen->compute);
55 if (ret) {
56 NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
57 return ret;
58 }
59
60 ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
61 &screen->parm);
62 if (ret)
63 return ret;
64
65 BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
66 PUSH_DATA (push, screen->compute->oclass);
67
68 /* hardware limit */
69 BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
70 PUSH_DATA (push, screen->mp_count);
71 BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
72 PUSH_DATA (push, 0xf);
73
74 BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
75 PUSH_DATA (push, 0x8000);
76
77 /* global memory setup */
78 BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
79 PUSH_DATA (push, 0);
80 BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
81 for (i = 0; i <= 0xff; i++)
82 PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
83 BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
84 PUSH_DATA (push, 1);
85
86 /* local memory and cstack setup */
87 BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
88 PUSH_DATAh(push, screen->tls->offset);
89 PUSH_DATA (push, screen->tls->offset);
90 BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
91 PUSH_DATAh(push, screen->tls->size);
92 PUSH_DATA (push, screen->tls->size);
93 BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
94 PUSH_DATA (push, 0);
95 BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
96 PUSH_DATA (push, 1 << 24);
97
98 /* shared memory setup */
99 BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
100 PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
101 BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
102 PUSH_DATA (push, 2 << 24);
103 BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
104 PUSH_DATA (push, 0);
105
106 /* code segment setup */
107 BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
108 PUSH_DATAh(push, screen->text->offset);
109 PUSH_DATA (push, screen->text->offset);
110
111 /* bind parameters buffer */
112 BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
113 PUSH_DATA (push, screen->parm->size);
114 PUSH_DATAh(push, screen->parm->offset);
115 PUSH_DATA (push, screen->parm->offset);
116 BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
117 PUSH_DATA (push, (0 << 8) | 1);
118
119 /* TODO: textures & samplers */
120
121 return 0;
122 }
123
124 bool
125 nvc0_compute_validate_program(struct nvc0_context *nvc0)
126 {
127 struct nvc0_program *prog = nvc0->compprog;
128
129 if (prog->mem)
130 return true;
131
132 if (!prog->translated) {
133 prog->translated = nvc0_program_translate(
134 prog, nvc0->screen->base.device->chipset);
135 if (!prog->translated)
136 return false;
137 }
138 if (unlikely(!prog->code_size))
139 return false;
140
141 if (likely(prog->code_size)) {
142 if (nvc0_program_upload_code(nvc0, prog)) {
143 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
144 BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
145 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
146 return true;
147 }
148 }
149 return false;
150 }
151
152 static bool
153 nvc0_compute_state_validate(struct nvc0_context *nvc0)
154 {
155 if (!nvc0_compute_validate_program(nvc0))
156 return false;
157
158 /* TODO: textures, samplers, surfaces, global memory buffers */
159
160 nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
161
162 nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
163 if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
164 return false;
165 if (unlikely(nvc0->state.flushed))
166 nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
167
168 return true;
169
170 }
171
172 static void
173 nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
174 {
175 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
176 struct nvc0_screen *screen = nvc0->screen;
177 struct nvc0_program *cp = nvc0->compprog;
178
179 if (cp->parm_size) {
180 BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
181 PUSH_DATA (push, align(cp->parm_size, 0x100));
182 PUSH_DATAh(push, screen->parm->offset);
183 PUSH_DATA (push, screen->parm->offset);
184 BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
185 PUSH_DATA (push, (0 << 8) | 1);
186 /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
187 BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
188 PUSH_DATA (push, 0);
189 PUSH_DATAp(push, input, cp->parm_size / 4);
190
191 BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
192 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
193 }
194 }
195
196 void
197 nvc0_launch_grid(struct pipe_context *pipe,
198 const uint *block_layout, const uint *grid_layout,
199 uint32_t label,
200 const void *input)
201 {
202 struct nvc0_context *nvc0 = nvc0_context(pipe);
203 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
204 struct nvc0_program *cp = nvc0->compprog;
205 unsigned s, i;
206 int ret;
207
208 ret = !nvc0_compute_state_validate(nvc0);
209 if (ret)
210 goto out;
211
212 nvc0_compute_upload_input(nvc0, input);
213
214 BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
215 PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
216
217 BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
218 PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
219 PUSH_DATA (push, 0);
220 PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
221
222 BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
223 PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
224 PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
225 PUSH_DATA (push, cp->num_barriers);
226 BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
227 PUSH_DATA (push, cp->num_gprs);
228
229 /* grid/block setup */
230 BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
231 PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
232 PUSH_DATA (push, grid_layout[2]);
233 BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
234 PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
235 PUSH_DATA (push, block_layout[2]);
236
237 /* launch preliminary setup */
238 BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
239 PUSH_DATA (push, 0x1);
240 BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
241 PUSH_DATA (push, 0);
242 BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
243 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
244
245 /* kernel launching */
246 BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
247 PUSH_DATA (push, 0);
248 BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
249 PUSH_DATA (push, 0);
250 BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
251 PUSH_DATA (push, 0x1000);
252 BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
253 PUSH_DATA (push, 0);
254 BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
255 PUSH_DATA (push, 0x1);
256
257 /* rebind all the 3D constant buffers
258 * (looks like binding a CB on COMPUTE clobbers 3D state) */
259 nvc0->dirty |= NVC0_NEW_CONSTBUF;
260 for (s = 0; s < 6; s++) {
261 for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
262 if (nvc0->constbuf[s][i].u.buf)
263 nvc0->constbuf_dirty[s] |= 1 << i;
264 }
265 memset(nvc0->state.uniform_buffer_bound, 0,
266 sizeof(nvc0->state.uniform_buffer_bound));
267
268 out:
269 if (ret)
270 NOUVEAU_ERR("Failed to launch grid !\n");
271 }