llvmpipe: add compute shader generation.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_cs.c
1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25 #include "util/u_memory.h"
26 #include "util/simple_list.h"
27 #include "util/os_time.h"
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "gallivm/lp_bld_const.h"
31 #include "gallivm/lp_bld_debug.h"
32 #include "gallivm/lp_bld_intr.h"
33 #include "gallivm/lp_bld_flow.h"
34 #include "gallivm/lp_bld_gather.h"
35 #include "gallivm/lp_bld_coro.h"
36 #include "lp_state_cs.h"
37 #include "lp_context.h"
38 #include "lp_debug.h"
39 #include "lp_state.h"
40 #include "lp_perf.h"
41
42 static void
43 generate_compute(struct llvmpipe_context *lp,
44 struct lp_compute_shader *shader,
45 struct lp_compute_shader_variant *variant)
46 {
47 struct gallivm_state *gallivm = variant->gallivm;
48 char func_name[64], func_name_coro[64];
49 LLVMTypeRef arg_types[13];
50 LLVMTypeRef func_type, coro_func_type;
51 LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
52 LLVMValueRef context_ptr;
53 LLVMValueRef x_size_arg, y_size_arg, z_size_arg;
54 LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg;
55 LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg;
56 LLVMValueRef thread_data_ptr;
57 LLVMBasicBlockRef block;
58 LLVMBuilderRef builder;
59 LLVMValueRef function, coro;
60 struct lp_type cs_type;
61 unsigned i;
62
63 /*
64 * This function has two parts
65 * a) setup the coroutine execution environment loop.
66 * b) build the compute shader llvm for use inside the coroutine.
67 */
68 assert(lp_native_vector_width / 32 >= 4);
69
70 memset(&cs_type, 0, sizeof cs_type);
71 cs_type.floating = TRUE; /* floating point values */
72 cs_type.sign = TRUE; /* values are signed */
73 cs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
74 cs_type.width = 32; /* 32-bit float */
75 cs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
76 snprintf(func_name, sizeof(func_name), "cs%u_variant%u",
77 shader->no, variant->no);
78
79 snprintf(func_name_coro, sizeof(func_name), "cs_co_%u_variant%u",
80 shader->no, variant->no);
81
82 arg_types[0] = variant->jit_cs_context_ptr_type; /* context */
83 arg_types[1] = int32_type; /* block_x_size */
84 arg_types[2] = int32_type; /* block_y_size */
85 arg_types[3] = int32_type; /* block_z_size */
86 arg_types[4] = int32_type; /* grid_x */
87 arg_types[5] = int32_type; /* grid_y */
88 arg_types[6] = int32_type; /* grid_z */
89 arg_types[7] = int32_type; /* grid_size_x */
90 arg_types[8] = int32_type; /* grid_size_y */
91 arg_types[9] = int32_type; /* grid_size_z */
92 arg_types[10] = variant->jit_cs_thread_data_ptr_type; /* per thread data */
93 arg_types[11] = int32_type;
94 arg_types[12] = int32_type;
95 func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
96 arg_types, ARRAY_SIZE(arg_types) - 2, 0);
97
98 coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0),
99 arg_types, ARRAY_SIZE(arg_types), 0);
100
101 function = LLVMAddFunction(gallivm->module, func_name, func_type);
102 LLVMSetFunctionCallConv(function, LLVMCCallConv);
103
104 coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
105 LLVMSetFunctionCallConv(coro, LLVMCCallConv);
106
107 variant->function = function;
108
109 for(i = 0; i < ARRAY_SIZE(arg_types); ++i) {
110 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
111 lp_add_function_attr(coro, i + 1, LP_FUNC_ATTR_NOALIAS);
112 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
113 }
114 }
115
116 context_ptr = LLVMGetParam(function, 0);
117 x_size_arg = LLVMGetParam(function, 1);
118 y_size_arg = LLVMGetParam(function, 2);
119 z_size_arg = LLVMGetParam(function, 3);
120 grid_x_arg = LLVMGetParam(function, 4);
121 grid_y_arg = LLVMGetParam(function, 5);
122 grid_z_arg = LLVMGetParam(function, 6);
123 grid_size_x_arg = LLVMGetParam(function, 7);
124 grid_size_y_arg = LLVMGetParam(function, 8);
125 grid_size_z_arg = LLVMGetParam(function, 9);
126 thread_data_ptr = LLVMGetParam(function, 10);
127
128 lp_build_name(context_ptr, "context");
129 lp_build_name(x_size_arg, "x_size");
130 lp_build_name(y_size_arg, "y_size");
131 lp_build_name(z_size_arg, "z_size");
132 lp_build_name(grid_x_arg, "grid_x");
133 lp_build_name(grid_y_arg, "grid_y");
134 lp_build_name(grid_z_arg, "grid_z");
135 lp_build_name(grid_size_x_arg, "grid_size_x");
136 lp_build_name(grid_size_y_arg, "grid_size_y");
137 lp_build_name(grid_size_z_arg, "grid_size_z");
138 lp_build_name(thread_data_ptr, "thread_data");
139
140 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
141 builder = gallivm->builder;
142 assert(builder);
143 LLVMPositionBuilderAtEnd(builder, block);
144
145 struct lp_build_loop_state loop_state[4];
146 LLVMValueRef num_x_loop;
147 LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length);
148 num_x_loop = LLVMBuildAdd(gallivm->builder, x_size_arg, vec_length, "");
149 num_x_loop = LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), "");
150 num_x_loop = LLVMBuildUDiv(gallivm->builder, num_x_loop, vec_length, "");
151 LLVMValueRef partials = LLVMBuildURem(gallivm->builder, x_size_arg, vec_length, "");
152
153 LLVMValueRef coro_num_hdls = LLVMBuildMul(gallivm->builder, num_x_loop, y_size_arg, "");
154 coro_num_hdls = LLVMBuildMul(gallivm->builder, coro_num_hdls, z_size_arg, "");
155
156 LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
157 LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, coro_num_hdls, "coro_hdls");
158
159 unsigned end_coroutine = INT_MAX;
160
161 /*
162 * This is the main coroutine execution loop. It iterates over the dimensions
163 * and calls the coroutine main entrypoint on the first pass, but in subsequent
164 * passes it checks if the coroutine has completed and resumes it if not.
165 */
166 /* take x_width - round up to type.length width */
167 lp_build_loop_begin(&loop_state[3], gallivm,
168 lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
169 lp_build_loop_begin(&loop_state[2], gallivm,
170 lp_build_const_int32(gallivm, 0)); /* z loop */
171 lp_build_loop_begin(&loop_state[1], gallivm,
172 lp_build_const_int32(gallivm, 0)); /* y loop */
173 lp_build_loop_begin(&loop_state[0], gallivm,
174 lp_build_const_int32(gallivm, 0)); /* x loop */
175 {
176 LLVMValueRef args[13];
177 args[0] = context_ptr;
178 args[1] = loop_state[0].counter;
179 args[2] = loop_state[1].counter;
180 args[3] = loop_state[2].counter;
181 args[4] = grid_x_arg;
182 args[5] = grid_y_arg;
183 args[6] = grid_z_arg;
184 args[7] = grid_size_x_arg;
185 args[8] = grid_size_y_arg;
186 args[9] = grid_size_z_arg;
187 args[10] = thread_data_ptr;
188 args[11] = num_x_loop;
189 args[12] = partials;
190
191 /* idx = (z * (size_x * size_y) + y * size_x + x */
192 LLVMValueRef coro_hdl_idx = LLVMBuildMul(gallivm->builder, loop_state[2].counter,
193 LLVMBuildMul(gallivm->builder, num_x_loop, y_size_arg, ""), "");
194 coro_hdl_idx = LLVMBuildAdd(gallivm->builder, coro_hdl_idx,
195 LLVMBuildMul(gallivm->builder, loop_state[1].counter,
196 num_x_loop, ""), "");
197 coro_hdl_idx = LLVMBuildAdd(gallivm->builder, coro_hdl_idx,
198 loop_state[0].counter, "");
199
200 LLVMValueRef coro_entry = LLVMBuildGEP(gallivm->builder, coro_hdls, &coro_hdl_idx, 1, "");
201
202 LLVMValueRef coro_hdl = LLVMBuildLoad(gallivm->builder, coro_entry, "coro_hdl");
203
204 struct lp_build_if_state ifstate;
205 LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[3].counter,
206 lp_build_const_int32(gallivm, 0), "");
207 /* first time here - call the coroutine function entry point */
208 lp_build_if(&ifstate, gallivm, cmp);
209 LLVMValueRef coro_ret = LLVMBuildCall(gallivm->builder, coro, args, 13, "");
210 LLVMBuildStore(gallivm->builder, coro_ret, coro_entry);
211 lp_build_else(&ifstate);
212 /* subsequent calls for this invocation - check if done. */
213 LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
214 struct lp_build_if_state ifstate2;
215 lp_build_if(&ifstate2, gallivm, coro_done);
216 /* if done destroy and force loop exit */
217 lp_build_coro_destroy(gallivm, coro_hdl);
218 lp_build_loop_force_set_counter(&loop_state[3], lp_build_const_int32(gallivm, end_coroutine - 1));
219 lp_build_else(&ifstate2);
220 /* otherwise resume the coroutine */
221 lp_build_coro_resume(gallivm, coro_hdl);
222 lp_build_endif(&ifstate2);
223 lp_build_endif(&ifstate);
224 lp_build_loop_force_reload_counter(&loop_state[3]);
225 }
226 lp_build_loop_end_cond(&loop_state[0],
227 num_x_loop,
228 NULL, LLVMIntUGE);
229 lp_build_loop_end_cond(&loop_state[1],
230 y_size_arg,
231 NULL, LLVMIntUGE);
232 lp_build_loop_end_cond(&loop_state[2],
233 z_size_arg,
234 NULL, LLVMIntUGE);
235 lp_build_loop_end_cond(&loop_state[3],
236 lp_build_const_int32(gallivm, end_coroutine),
237 NULL, LLVMIntEQ);
238 LLVMBuildRetVoid(builder);
239
240 /* This is stage (b) - generate the compute shader code inside the coroutine. */
241 context_ptr = LLVMGetParam(coro, 0);
242 x_size_arg = LLVMGetParam(coro, 1);
243 y_size_arg = LLVMGetParam(coro, 2);
244 z_size_arg = LLVMGetParam(coro, 3);
245 grid_x_arg = LLVMGetParam(coro, 4);
246 grid_y_arg = LLVMGetParam(coro, 5);
247 grid_z_arg = LLVMGetParam(coro, 6);
248 grid_size_x_arg = LLVMGetParam(coro, 7);
249 grid_size_y_arg = LLVMGetParam(coro, 8);
250 grid_size_z_arg = LLVMGetParam(coro, 9);
251 thread_data_ptr = LLVMGetParam(coro, 10);
252 num_x_loop = LLVMGetParam(coro, 11);
253 partials = LLVMGetParam(coro, 12);
254 block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry");
255 LLVMPositionBuilderAtEnd(builder, block);
256 {
257 const struct tgsi_token *tokens = shader->base.tokens;
258 LLVMValueRef consts_ptr, num_consts_ptr;
259 LLVMValueRef ssbo_ptr, num_ssbo_ptr;
260 LLVMValueRef shared_ptr;
261 struct lp_build_mask_context mask;
262 struct lp_bld_tgsi_system_values system_values;
263
264 memset(&system_values, 0, sizeof(system_values));
265 consts_ptr = lp_jit_cs_context_constants(gallivm, context_ptr);
266 num_consts_ptr = lp_jit_cs_context_num_constants(gallivm, context_ptr);
267 ssbo_ptr = lp_jit_cs_context_ssbos(gallivm, context_ptr);
268 num_ssbo_ptr = lp_jit_cs_context_num_ssbos(gallivm, context_ptr);
269 shared_ptr = lp_jit_cs_thread_data_shared(gallivm, thread_data_ptr);
270
271 /* these are coroutine entrypoint necessities */
272 LLVMValueRef coro_id = lp_build_coro_id(gallivm);
273 LLVMValueRef coro_hdl = lp_build_coro_begin_alloc_mem(gallivm, coro_id);
274
275 LLVMValueRef has_partials = LLVMBuildICmp(gallivm->builder, LLVMIntNE, partials, lp_build_const_int32(gallivm, 0), "");
276 LLVMValueRef tid_vals[3];
277 LLVMValueRef tids_x[LP_MAX_VECTOR_LENGTH], tids_y[LP_MAX_VECTOR_LENGTH], tids_z[LP_MAX_VECTOR_LENGTH];
278 LLVMValueRef base_val = LLVMBuildMul(gallivm->builder, x_size_arg, vec_length, "");
279 for (i = 0; i < cs_type.length; i++) {
280 tids_x[i] = LLVMBuildAdd(gallivm->builder, base_val, lp_build_const_int32(gallivm, i), "");
281 tids_y[i] = y_size_arg;
282 tids_z[i] = z_size_arg;
283 }
284 tid_vals[0] = lp_build_gather_values(gallivm, tids_x, cs_type.length);
285 tid_vals[1] = lp_build_gather_values(gallivm, tids_y, cs_type.length);
286 tid_vals[2] = lp_build_gather_values(gallivm, tids_z, cs_type.length);
287 system_values.thread_id = LLVMGetUndef(LLVMArrayType(LLVMVectorType(int32_type, cs_type.length), 3));
288 for (i = 0; i < 3; i++)
289 system_values.thread_id = LLVMBuildInsertValue(builder, system_values.thread_id, tid_vals[i], i, "");
290
291 LLVMValueRef gtids[3] = { grid_x_arg, grid_y_arg, grid_z_arg };
292 system_values.block_id = LLVMGetUndef(LLVMVectorType(int32_type, 3));
293 for (i = 0; i < 3; i++)
294 system_values.block_id = LLVMBuildInsertElement(builder, system_values.block_id, gtids[i], lp_build_const_int32(gallivm, i), "");
295
296 LLVMValueRef gstids[3] = { grid_size_x_arg, grid_size_y_arg, grid_size_z_arg };
297 system_values.grid_size = LLVMGetUndef(LLVMVectorType(int32_type, 3));
298 for (i = 0; i < 3; i++)
299 system_values.grid_size = LLVMBuildInsertElement(builder, system_values.grid_size, gstids[i], lp_build_const_int32(gallivm, i), "");
300
301 LLVMValueRef last_x_loop = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, x_size_arg, LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), ""), "");
302 LLVMValueRef use_partial_mask = LLVMBuildAnd(gallivm->builder, last_x_loop, has_partials, "");
303 struct lp_build_if_state if_state;
304 LLVMValueRef mask_val = lp_build_alloca(gallivm, LLVMVectorType(int32_type, cs_type.length), "mask");
305 LLVMValueRef full_mask_val = lp_build_const_int_vec(gallivm, cs_type, ~0);
306 LLVMBuildStore(gallivm->builder, full_mask_val, mask_val);
307
308 lp_build_if(&if_state, gallivm, use_partial_mask);
309 struct lp_build_loop_state mask_loop_state;
310 lp_build_loop_begin(&mask_loop_state, gallivm, partials);
311 LLVMValueRef tmask_val = LLVMBuildLoad(gallivm->builder, mask_val, "");
312 tmask_val = LLVMBuildInsertElement(gallivm->builder, tmask_val, lp_build_const_int32(gallivm, 0), mask_loop_state.counter, "");
313 LLVMBuildStore(gallivm->builder, tmask_val, mask_val);
314 lp_build_loop_end_cond(&mask_loop_state, vec_length, NULL, LLVMIntUGE);
315 lp_build_endif(&if_state);
316
317 mask_val = LLVMBuildLoad(gallivm->builder, mask_val, "");
318 lp_build_mask_begin(&mask, gallivm, cs_type, mask_val);
319
320 struct lp_build_coro_suspend_info coro_info;
321
322 LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "suspend");
323 LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "cleanup");
324
325 coro_info.suspend = sus_block;
326 coro_info.cleanup = clean_block;
327
328 struct lp_build_tgsi_params params;
329 memset(&params, 0, sizeof(params));
330
331 params.type = cs_type;
332 params.mask = &mask;
333 params.consts_ptr = consts_ptr;
334 params.const_sizes_ptr = num_consts_ptr;
335 params.system_values = &system_values;
336 params.context_ptr = context_ptr;
337 params.info = &shader->info.base;
338 params.ssbo_ptr = ssbo_ptr;
339 params.ssbo_sizes_ptr = num_ssbo_ptr;
340 params.shared_ptr = shared_ptr;
341 params.coro = &coro_info;
342
343 lp_build_tgsi_soa(gallivm, tokens, &params, NULL);
344
345 mask_val = lp_build_mask_end(&mask);
346
347 lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
348 LLVMPositionBuilderAtEnd(builder, clean_block);
349
350 lp_build_coro_free_mem(gallivm, coro_id, coro_hdl);
351
352 LLVMBuildBr(builder, sus_block);
353 LLVMPositionBuilderAtEnd(builder, sus_block);
354
355 lp_build_coro_end(gallivm, coro_hdl);
356 LLVMBuildRet(builder, coro_hdl);
357 }
358
359 gallivm_verify_function(gallivm, coro);
360 gallivm_verify_function(gallivm, function);
361 }
362
363 static void *
364 llvmpipe_create_compute_state(struct pipe_context *pipe,
365 const struct pipe_compute_state *templ)
366 {
367 struct lp_compute_shader *shader;
368
369 shader = CALLOC_STRUCT(lp_compute_shader);
370 if (!shader)
371 return NULL;
372
373 assert(templ->ir_type == PIPE_SHADER_IR_TGSI);
374 shader->base.tokens = tgsi_dup_tokens(templ->prog);
375
376 lp_build_tgsi_info(shader->base.tokens, &shader->info);
377 make_empty_list(&shader->variants);
378
379 return shader;
380 }
381
382 static void
383 llvmpipe_bind_compute_state(struct pipe_context *pipe,
384 void *cs)
385 {
386 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
387
388 if (llvmpipe->cs == cs)
389 return;
390
391 llvmpipe->cs = (struct lp_compute_shader *)cs;
392 llvmpipe->cs_dirty |= LP_CSNEW_CS;
393 }
394
395 /**
396 * Remove shader variant from two lists: the shader's variant list
397 * and the context's variant list.
398 */
399 static void
400 llvmpipe_remove_cs_shader_variant(struct llvmpipe_context *lp,
401 struct lp_compute_shader_variant *variant)
402 {
403 if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
404 debug_printf("llvmpipe: del cs #%u var %u v created %u v cached %u "
405 "v total cached %u inst %u total inst %u\n",
406 variant->shader->no, variant->no,
407 variant->shader->variants_created,
408 variant->shader->variants_cached,
409 lp->nr_cs_variants, variant->nr_instrs, lp->nr_cs_instrs);
410 }
411
412 gallivm_destroy(variant->gallivm);
413
414 /* remove from shader's list */
415 remove_from_list(&variant->list_item_local);
416 variant->shader->variants_cached--;
417
418 /* remove from context's list */
419 remove_from_list(&variant->list_item_global);
420 lp->nr_fs_variants--;
421 lp->nr_fs_instrs -= variant->nr_instrs;
422
423 FREE(variant);
424 }
425
426 static void
427 llvmpipe_delete_compute_state(struct pipe_context *pipe,
428 void *cs)
429 {
430 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
431 struct lp_compute_shader *shader = cs;
432 struct lp_cs_variant_list_item *li;
433
434 /* Delete all the variants */
435 li = first_elem(&shader->variants);
436 while(!at_end(&shader->variants, li)) {
437 struct lp_cs_variant_list_item *next = next_elem(li);
438 llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
439 li = next;
440 }
441 tgsi_free_tokens(shader->base.tokens);
442 FREE(shader);
443 }
444
445 static void
446 make_variant_key(struct llvmpipe_context *lp,
447 struct lp_compute_shader *shader,
448 struct lp_compute_shader_variant_key *key)
449 {
450 memset(key, 0, shader->variant_key_size);
451 }
452
453 static void
454 dump_cs_variant_key(const struct lp_compute_shader_variant_key *key)
455 {
456 debug_printf("cs variant %p:\n", (void *) key);
457 }
458
459 static void
460 lp_debug_cs_variant(const struct lp_compute_shader_variant *variant)
461 {
462 debug_printf("llvmpipe: Compute shader #%u variant #%u:\n",
463 variant->shader->no, variant->no);
464 tgsi_dump(variant->shader->base.tokens, 0);
465 dump_cs_variant_key(&variant->key);
466 debug_printf("\n");
467 }
468
469 static struct lp_compute_shader_variant *
470 generate_variant(struct llvmpipe_context *lp,
471 struct lp_compute_shader *shader,
472 const struct lp_compute_shader_variant_key *key)
473 {
474 struct lp_compute_shader_variant *variant;
475 char module_name[64];
476
477 variant = CALLOC_STRUCT(lp_compute_shader_variant);
478 if (!variant)
479 return NULL;
480
481 snprintf(module_name, sizeof(module_name), "cs%u_variant%u",
482 shader->no, shader->variants_created);
483
484 variant->gallivm = gallivm_create(module_name, lp->context);
485 if (!variant->gallivm) {
486 FREE(variant);
487 return NULL;
488 }
489
490 variant->shader = shader;
491 variant->list_item_global.base = variant;
492 variant->list_item_local.base = variant;
493 variant->no = shader->variants_created++;
494
495 memcpy(&variant->key, key, shader->variant_key_size);
496
497 if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
498 lp_debug_cs_variant(variant);
499 }
500
501 lp_jit_init_cs_types(variant);
502
503 generate_compute(lp, shader, variant);
504
505 gallivm_compile_module(variant->gallivm);
506
507 variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module);
508
509 variant->jit_function = (lp_jit_cs_func)gallivm_jit_function(variant->gallivm, variant->function);
510
511 gallivm_free_ir(variant->gallivm);
512 return variant;
513 }
514
515 static void
516 lp_cs_ctx_set_cs_variant( struct lp_cs_context *csctx,
517 struct lp_compute_shader_variant *variant)
518 {
519 csctx->cs.current.variant = variant;
520 }
521
522 static void
523 llvmpipe_update_cs(struct llvmpipe_context *lp)
524 {
525 struct lp_compute_shader *shader = lp->cs;
526
527 struct lp_compute_shader_variant_key key;
528 struct lp_compute_shader_variant *variant = NULL;
529 struct lp_cs_variant_list_item *li;
530
531 make_variant_key(lp, shader, &key);
532
533 /* Search the variants for one which matches the key */
534 li = first_elem(&shader->variants);
535 while(!at_end(&shader->variants, li)) {
536 if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) {
537 variant = li->base;
538 break;
539 }
540 li = next_elem(li);
541 }
542
543 if (variant) {
544 /* Move this variant to the head of the list to implement LRU
545 * deletion of shader's when we have too many.
546 */
547 move_to_head(&lp->cs_variants_list, &variant->list_item_global);
548 }
549 else {
550 /* variant not found, create it now */
551 int64_t t0, t1, dt;
552 unsigned i;
553 unsigned variants_to_cull;
554
555 if (LP_DEBUG & DEBUG_CS) {
556 debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
557 lp->nr_cs_variants,
558 lp->nr_cs_instrs,
559 lp->nr_cs_variants ? lp->nr_cs_instrs / lp->nr_cs_variants : 0);
560 }
561
562 /* First, check if we've exceeded the max number of shader variants.
563 * If so, free 6.25% of them (the least recently used ones).
564 */
565 variants_to_cull = lp->nr_cs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 16 : 0;
566
567 if (variants_to_cull ||
568 lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
569 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
570 debug_printf("Evicting CS: %u cs variants,\t%u total variants,"
571 "\t%u instrs,\t%u instrs/variant\n",
572 shader->variants_cached,
573 lp->nr_cs_variants, lp->nr_cs_instrs,
574 lp->nr_cs_instrs / lp->nr_cs_variants);
575 }
576
577 /*
578 * We need to re-check lp->nr_cs_variants because an arbitrarliy large
579 * number of shader variants (potentially all of them) could be
580 * pending for destruction on flush.
581 */
582
583 for (i = 0; i < variants_to_cull || lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) {
584 struct lp_cs_variant_list_item *item;
585 if (is_empty_list(&lp->cs_variants_list)) {
586 break;
587 }
588 item = last_elem(&lp->cs_variants_list);
589 assert(item);
590 assert(item->base);
591 llvmpipe_remove_cs_shader_variant(lp, item->base);
592 }
593 }
594 /*
595 * Generate the new variant.
596 */
597 t0 = os_time_get();
598 variant = generate_variant(lp, shader, &key);
599 t1 = os_time_get();
600 dt = t1 - t0;
601 LP_COUNT_ADD(llvm_compile_time, dt);
602 LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */
603
604 /* Put the new variant into the list */
605 if (variant) {
606 insert_at_head(&shader->variants, &variant->list_item_local);
607 insert_at_head(&lp->cs_variants_list, &variant->list_item_global);
608 lp->nr_cs_variants++;
609 lp->nr_cs_instrs += variant->nr_instrs;
610 shader->variants_cached++;
611 }
612 }
613 /* Bind this variant */
614 lp_cs_ctx_set_cs_variant(lp->csctx, variant);
615 }
616
617 static void
618 llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe)
619 {
620 if (llvmpipe->cs_dirty & (LP_CSNEW_CS))
621 llvmpipe_update_cs(llvmpipe);
622
623 llvmpipe->cs_dirty = 0;
624 }
625
626 static void llvmpipe_launch_grid(struct pipe_context *pipe,
627 const struct pipe_grid_info *info)
628 {
629 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
630
631 llvmpipe_cs_update_derived(llvmpipe);
632 }
633
634 void
635 llvmpipe_init_compute_funcs(struct llvmpipe_context *llvmpipe)
636 {
637 llvmpipe->pipe.create_compute_state = llvmpipe_create_compute_state;
638 llvmpipe->pipe.bind_compute_state = llvmpipe_bind_compute_state;
639 llvmpipe->pipe.delete_compute_state = llvmpipe_delete_compute_state;
640 llvmpipe->pipe.launch_grid = llvmpipe_launch_grid;
641 }
642
643 void
644 lp_csctx_destroy(struct lp_cs_context *csctx)
645 {
646 FREE(csctx);
647 }
648
649 struct lp_cs_context *lp_csctx_create(struct pipe_context *pipe)
650 {
651 struct lp_cs_context *csctx;
652
653 csctx = CALLOC_STRUCT(lp_cs_context);
654 if (!csctx)
655 return NULL;
656
657 csctx->pipe = pipe;
658 return csctx;
659 }