2 * Copyright 2012 Francisco Jerez
3 * Copyright 2015 Samuel Pitoiset
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "nv50/nv50_context.h"
28 #include "nv50/nv50_compute.xml.h"
30 #include "codegen/nv50_ir_driver.h"
33 nv50_screen_compute_setup(struct nv50_screen
*screen
,
34 struct nouveau_pushbuf
*push
)
36 struct nouveau_device
*dev
= screen
->base
.device
;
37 struct nouveau_object
*chan
= screen
->base
.channel
;
38 struct nv04_fifo
*fifo
= (struct nv04_fifo
*)chan
->data
;
42 switch (dev
->chipset
& 0xf0) {
46 obj_class
= NV50_COMPUTE_CLASS
;
49 switch (dev
->chipset
) {
53 obj_class
= NVA3_COMPUTE_CLASS
;
56 obj_class
= NV50_COMPUTE_CLASS
;
61 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev
->chipset
);
65 ret
= nouveau_object_new(chan
, 0xbeef50c0, obj_class
, NULL
, 0,
70 BEGIN_NV04(push
, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT
), 1);
71 PUSH_DATA (push
, screen
->compute
->handle
);
73 BEGIN_NV04(push
, NV50_COMPUTE(UNK02A0
), 1);
75 BEGIN_NV04(push
, NV50_COMPUTE(DMA_STACK
), 1);
76 PUSH_DATA (push
, fifo
->vram
);
77 BEGIN_NV04(push
, NV50_COMPUTE(STACK_ADDRESS_HIGH
), 2);
78 PUSH_DATAh(push
, screen
->stack_bo
->offset
);
79 PUSH_DATA (push
, screen
->stack_bo
->offset
);
80 BEGIN_NV04(push
, NV50_COMPUTE(STACK_SIZE_LOG
), 1);
83 BEGIN_NV04(push
, NV50_COMPUTE(UNK0290
), 1);
85 BEGIN_NV04(push
, NV50_COMPUTE(LANES32_ENABLE
), 1);
87 BEGIN_NV04(push
, NV50_COMPUTE(REG_MODE
), 1);
88 PUSH_DATA (push
, NV50_COMPUTE_REG_MODE_STRIPED
);
89 BEGIN_NV04(push
, NV50_COMPUTE(UNK0384
), 1);
90 PUSH_DATA (push
, 0x100);
91 BEGIN_NV04(push
, NV50_COMPUTE(DMA_GLOBAL
), 1);
92 PUSH_DATA (push
, fifo
->vram
);
94 for (i
= 0; i
< 15; i
++) {
95 BEGIN_NV04(push
, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i
)), 2);
98 BEGIN_NV04(push
, NV50_COMPUTE(GLOBAL_LIMIT(i
)), 1);
100 BEGIN_NV04(push
, NV50_COMPUTE(GLOBAL_MODE(i
)), 1);
101 PUSH_DATA (push
, NV50_COMPUTE_GLOBAL_MODE_LINEAR
);
104 BEGIN_NV04(push
, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
107 BEGIN_NV04(push
, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
108 PUSH_DATA (push
, ~0);
109 BEGIN_NV04(push
, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
110 PUSH_DATA (push
, NV50_COMPUTE_GLOBAL_MODE_LINEAR
);
112 BEGIN_NV04(push
, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC
), 1);
114 BEGIN_NV04(push
, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP
), 1);
116 BEGIN_NV04(push
, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC
), 1);
118 BEGIN_NV04(push
, NV50_COMPUTE(STACK_WARPS_NO_CLAMP
), 1);
120 BEGIN_NV04(push
, NV50_COMPUTE(USER_PARAM_COUNT
), 1);
123 BEGIN_NV04(push
, NV50_COMPUTE(DMA_TEXTURE
), 1);
124 PUSH_DATA (push
, fifo
->vram
);
125 BEGIN_NV04(push
, NV50_COMPUTE(TEX_LIMITS
), 1);
126 PUSH_DATA (push
, 0x54);
127 BEGIN_NV04(push
, NV50_COMPUTE(LINKED_TSC
), 1);
130 BEGIN_NV04(push
, NV50_COMPUTE(DMA_TIC
), 1);
131 PUSH_DATA (push
, fifo
->vram
);
132 BEGIN_NV04(push
, NV50_COMPUTE(TIC_ADDRESS_HIGH
), 3);
133 PUSH_DATAh(push
, screen
->txc
->offset
);
134 PUSH_DATA (push
, screen
->txc
->offset
);
135 PUSH_DATA (push
, NV50_TIC_MAX_ENTRIES
- 1);
137 BEGIN_NV04(push
, NV50_COMPUTE(DMA_TSC
), 1);
138 PUSH_DATA (push
, fifo
->vram
);
139 BEGIN_NV04(push
, NV50_COMPUTE(TSC_ADDRESS_HIGH
), 3);
140 PUSH_DATAh(push
, screen
->txc
->offset
+ 65536);
141 PUSH_DATA (push
, screen
->txc
->offset
+ 65536);
142 PUSH_DATA (push
, NV50_TSC_MAX_ENTRIES
- 1);
144 BEGIN_NV04(push
, NV50_COMPUTE(DMA_CODE_CB
), 1);
145 PUSH_DATA (push
, fifo
->vram
);
147 BEGIN_NV04(push
, NV50_COMPUTE(DMA_LOCAL
), 1);
148 PUSH_DATA (push
, fifo
->vram
);
149 BEGIN_NV04(push
, NV50_COMPUTE(LOCAL_ADDRESS_HIGH
), 2);
150 PUSH_DATAh(push
, screen
->tls_bo
->offset
+ 65536);
151 PUSH_DATA (push
, screen
->tls_bo
->offset
+ 65536);
152 BEGIN_NV04(push
, NV50_COMPUTE(LOCAL_SIZE_LOG
), 1);
153 PUSH_DATA (push
, util_logbase2((screen
->max_tls_space
/ ONE_TEMP_SIZE
) * 2));
159 nv50_compute_validate_program(struct nv50_context
*nv50
)
161 struct nv50_program
*prog
= nv50
->compprog
;
166 if (!prog
->translated
) {
167 prog
->translated
= nv50_program_translate(
168 prog
, nv50
->screen
->base
.device
->chipset
, &nv50
->base
.debug
);
169 if (!prog
->translated
)
172 if (unlikely(!prog
->code_size
))
175 if (likely(prog
->code_size
)) {
176 if (nv50_program_upload_code(nv50
, prog
)) {
177 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
178 BEGIN_NV04(push
, NV50_COMPUTE(CODE_CB_FLUSH
), 1);
187 nv50_compute_validate_globals(struct nv50_context
*nv50
)
191 for (i
= 0; i
< nv50
->global_residents
.size
/ sizeof(struct pipe_resource
*);
193 struct pipe_resource
*res
= *util_dynarray_element(
194 &nv50
->global_residents
, struct pipe_resource
*, i
);
196 nv50_add_bufctx_resident(nv50
->bufctx_cp
, NV50_BIND_CP_GLOBAL
,
197 nv04_resource(res
), NOUVEAU_BO_RDWR
);
202 nv50_compute_state_validate(struct nv50_context
*nv50
)
204 if (!nv50_compute_validate_program(nv50
))
207 if (nv50
->dirty_cp
& NV50_NEW_CP_GLOBALS
)
208 nv50_compute_validate_globals(nv50
);
210 /* TODO: validate textures, samplers, surfaces */
212 nv50_bufctx_fence(nv50
->bufctx_cp
, false);
214 nouveau_pushbuf_bufctx(nv50
->base
.pushbuf
, nv50
->bufctx_cp
);
215 if (unlikely(nouveau_pushbuf_validate(nv50
->base
.pushbuf
)))
217 if (unlikely(nv50
->state
.flushed
))
218 nv50_bufctx_fence(nv50
->bufctx_cp
, true);
224 nv50_compute_upload_input(struct nv50_context
*nv50
, const uint32_t *input
)
226 struct nv50_screen
*screen
= nv50
->screen
;
227 struct nouveau_pushbuf
*push
= screen
->base
.pushbuf
;
228 unsigned size
= align(nv50
->compprog
->parm_size
, 0x4);
230 BEGIN_NV04(push
, NV50_COMPUTE(USER_PARAM_COUNT
), 1);
231 PUSH_DATA (push
, (size
/ 4) << 8);
234 struct nouveau_mm_allocation
*mm
;
235 struct nouveau_bo
*bo
= NULL
;
238 mm
= nouveau_mm_allocate(screen
->base
.mm_GART
, size
, &bo
, &offset
);
241 nouveau_bo_map(bo
, 0, screen
->base
.client
);
242 memcpy(bo
->map
+ offset
, input
, size
);
244 nouveau_bufctx_refn(nv50
->bufctx
, 0, bo
, NOUVEAU_BO_GART
| NOUVEAU_BO_RD
);
245 nouveau_pushbuf_bufctx(push
, nv50
->bufctx
);
246 nouveau_pushbuf_validate(push
);
248 BEGIN_NV04(push
, NV50_COMPUTE(USER_PARAM(0)), size
/ 4);
249 nouveau_pushbuf_data(push
, bo
, offset
, size
);
251 nouveau_fence_work(screen
->base
.fence
.current
, nouveau_mm_free_work
, mm
);
252 nouveau_bo_ref(NULL
, &bo
);
253 nouveau_bufctx_reset(nv50
->bufctx
, 0);
258 nv50_compute_find_symbol(struct nv50_context
*nv50
, uint32_t label
)
260 struct nv50_program
*prog
= nv50
->compprog
;
261 const struct nv50_ir_prog_symbol
*syms
=
262 (const struct nv50_ir_prog_symbol
*)prog
->cp
.syms
;
265 for (i
= 0; i
< prog
->cp
.num_syms
; ++i
) {
266 if (syms
[i
].label
== label
)
267 return prog
->code_base
+ syms
[i
].offset
;
269 return prog
->code_base
; /* no symbols or symbol not found */
273 nv50_launch_grid(struct pipe_context
*pipe
, const struct pipe_grid_info
*info
)
275 struct nv50_context
*nv50
= nv50_context(pipe
);
276 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
277 unsigned block_size
= info
->block
[0] * info
->block
[1] * info
->block
[2];
278 struct nv50_program
*cp
= nv50
->compprog
;
281 ret
= !nv50_compute_state_validate(nv50
);
283 NOUVEAU_ERR("Failed to launch grid !\n");
287 nv50_compute_upload_input(nv50
, info
->input
);
289 BEGIN_NV04(push
, NV50_COMPUTE(CP_START_ID
), 1);
290 PUSH_DATA (push
, nv50_compute_find_symbol(nv50
, info
->pc
));
292 BEGIN_NV04(push
, NV50_COMPUTE(SHARED_SIZE
), 1);
293 PUSH_DATA (push
, align(cp
->cp
.smem_size
+ cp
->parm_size
+ 0x10, 0x40));
294 BEGIN_NV04(push
, NV50_COMPUTE(CP_REG_ALLOC_TEMP
), 1);
295 PUSH_DATA (push
, cp
->max_gpr
);
297 /* grid/block setup */
298 BEGIN_NV04(push
, NV50_COMPUTE(BLOCKDIM_XY
), 2);
299 PUSH_DATA (push
, info
->block
[1] << 16 | info
->block
[0]);
300 PUSH_DATA (push
, info
->block
[2]);
301 BEGIN_NV04(push
, NV50_COMPUTE(BLOCK_ALLOC
), 1);
302 PUSH_DATA (push
, 1 << 16 | block_size
);
303 BEGIN_NV04(push
, NV50_COMPUTE(BLOCKDIM_LATCH
), 1);
305 BEGIN_NV04(push
, NV50_COMPUTE(GRIDDIM
), 1);
306 PUSH_DATA (push
, info
->grid
[1] << 16 | info
->grid
[0]);
307 BEGIN_NV04(push
, NV50_COMPUTE(GRIDID
), 1);
310 /* kernel launching */
311 BEGIN_NV04(push
, NV50_COMPUTE(LAUNCH
), 1);
313 BEGIN_NV04(push
, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE
), 1);
316 /* bind a compute shader clobbers fragment shader state */
317 nv50
->dirty
|= NV50_NEW_FRAGPROG
;