radeonsi: Add support for loading integers from constant memory
[mesa.git] / src / gallium / drivers / radeonsi / radeonsi_shader.c
1
2 #include "gallivm/lp_bld_tgsi_action.h"
3 #include "gallivm/lp_bld_const.h"
4 #include "gallivm/lp_bld_intr.h"
5 #include "gallivm/lp_bld_tgsi.h"
6 #include "radeon_llvm.h"
7 #include "radeon_llvm_emit.h"
8 #include "tgsi/tgsi_info.h"
9 #include "tgsi/tgsi_parse.h"
10 #include "tgsi/tgsi_scan.h"
11 #include "tgsi/tgsi_dump.h"
12
13 #include "radeonsi_pipe.h"
14 #include "radeonsi_shader.h"
15 #include "si_state.h"
16 #include "sid.h"
17
18 #include <assert.h>
19 #include <errno.h>
20 #include <stdio.h>
21
22 /*
23 static ps_remap_inputs(
24 struct tgsi_llvm_context * tl_ctx,
25 unsigned tgsi_index,
26 unsigned tgsi_chan)
27 {
28 :
29 }
30
31 struct si_input
32 {
33 struct list_head head;
34 unsigned tgsi_index;
35 unsigned tgsi_chan;
36 unsigned order;
37 };
38 */
39
40
41 struct si_shader_context
42 {
43 struct radeon_llvm_context radeon_bld;
44 struct r600_context *rctx;
45 struct tgsi_parse_context parse;
46 struct tgsi_token * tokens;
47 struct si_pipe_shader *shader;
48 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
49 /* unsigned num_inputs; */
50 /* struct list_head inputs; */
51 /* unsigned * input_mappings *//* From TGSI to SI hw */
52 /* struct tgsi_shader_info info;*/
53 };
54
55 static struct si_shader_context * si_shader_context(
56 struct lp_build_tgsi_context * bld_base)
57 {
58 return (struct si_shader_context *)bld_base;
59 }
60
61
62 #define PERSPECTIVE_BASE 0
63 #define LINEAR_BASE 9
64
65 #define SAMPLE_OFFSET 0
66 #define CENTER_OFFSET 2
67 #define CENTROID_OFSET 4
68
69 #define USE_SGPR_MAX_SUFFIX_LEN 5
70 #define CONST_ADDR_SPACE 2
71 #define USER_SGPR_ADDR_SPACE 8
72
73 enum sgpr_type {
74 SGPR_CONST_PTR_F32,
75 SGPR_CONST_PTR_V4I32,
76 SGPR_CONST_PTR_V8I32,
77 SGPR_I32,
78 SGPR_I64
79 };
80
81 /**
82 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad
83 *
84 * @param offset The offset parameter specifies the number of
85 * elements to offset, not the number of bytes or dwords. An element is the
86 * the type pointed to by the base_ptr parameter (e.g. int is the element of
87 * an int* pointer)
88 *
89 * When LLVM lowers the load instruction, it will convert the element offset
90 * into a dword offset automatically.
91 *
92 */
93 static LLVMValueRef build_indexed_load(
94 struct gallivm_state * gallivm,
95 LLVMValueRef base_ptr,
96 LLVMValueRef offset)
97 {
98 LLVMValueRef computed_ptr = LLVMBuildGEP(
99 gallivm->builder, base_ptr, &offset, 1, "");
100
101 return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
102 }
103
104 /**
105 * Load a value stored in one of the user SGPRs
106 *
107 * @param sgpr This is the sgpr to load the value from. If you need to load a
108 * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer),
109 * then you should pass the index of the first SGPR that holds the value. For
110 * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then
111 * use pass 2 for the sgpr parameter.
112 *
113 * The value of the sgpr parameter must also be aligned to the width of the type
114 * being loaded, so that the sgpr parameter is divisible by the dword width of the
115 * type. For example, if the value being loaded is two dwords wide, then the sgpr
116 * parameter must be divisible by two.
117 */
118 static LLVMValueRef use_sgpr(
119 struct gallivm_state * gallivm,
120 enum sgpr_type type,
121 unsigned sgpr)
122 {
123 LLVMValueRef sgpr_index;
124 LLVMTypeRef ret_type;
125 LLVMValueRef ptr;
126
127 sgpr_index = lp_build_const_int32(gallivm, sgpr);
128
129 switch (type) {
130 case SGPR_CONST_PTR_F32:
131 assert(sgpr % 2 == 0);
132 ret_type = LLVMFloatTypeInContext(gallivm->context);
133 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
134 break;
135
136 case SGPR_I32:
137 ret_type = LLVMInt32TypeInContext(gallivm->context);
138 break;
139
140 case SGPR_I64:
141 assert(sgpr % 2 == 0);
142 ret_type= LLVMInt64TypeInContext(gallivm->context);
143 break;
144
145 case SGPR_CONST_PTR_V4I32:
146 assert(sgpr % 2 == 0);
147 ret_type = LLVMInt32TypeInContext(gallivm->context);
148 ret_type = LLVMVectorType(ret_type, 4);
149 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
150 break;
151
152 case SGPR_CONST_PTR_V8I32:
153 assert(sgpr % 2 == 0);
154 ret_type = LLVMInt32TypeInContext(gallivm->context);
155 ret_type = LLVMVectorType(ret_type, 8);
156 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
157 break;
158
159 default:
160 assert(!"Unsupported SGPR type in use_sgpr()");
161 return NULL;
162 }
163
164 ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE);
165 ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, "");
166 return LLVMBuildLoad(gallivm->builder, ptr, "");
167 }
168
169 static void declare_input_vs(
170 struct si_shader_context * si_shader_ctx,
171 unsigned input_index,
172 const struct tgsi_full_declaration *decl)
173 {
174 LLVMValueRef t_list_ptr;
175 LLVMValueRef t_offset;
176 LLVMValueRef t_list;
177 LLVMValueRef attribute_offset;
178 LLVMValueRef buffer_index_reg;
179 LLVMValueRef args[3];
180 LLVMTypeRef vec4_type;
181 LLVMValueRef input;
182 struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
183 struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
184 struct r600_context *rctx = si_shader_ctx->rctx;
185 struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
186 unsigned chan;
187
188 /* Load the T list */
189 /* XXX: Communicate with the rest of the driver about which SGPR the T#
190 * list pointer is going to be stored in. Hard code to SGPR[6:7] for
191 * now */
192 t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 6);
193
194 t_offset = lp_build_const_int32(base->gallivm, velem->vertex_buffer_index);
195
196 t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset);
197
198 /* Build the attribute offset */
199 attribute_offset = lp_build_const_int32(base->gallivm, velem->src_offset);
200
201 /* Load the buffer index is always, which is always stored in VGPR0
202 * for Vertex Shaders */
203 buffer_index_reg = lp_build_intrinsic(base->gallivm->builder,
204 "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0);
205
206 vec4_type = LLVMVectorType(base->elem_type, 4);
207 args[0] = t_list;
208 args[1] = attribute_offset;
209 args[2] = buffer_index_reg;
210 input = lp_build_intrinsic(base->gallivm->builder,
211 "llvm.SI.vs.load.input", vec4_type, args, 3);
212
213 /* Break up the vec4 into individual components */
214 for (chan = 0; chan < 4; chan++) {
215 LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
216 /* XXX: Use a helper function for this. There is one in
217 * tgsi_llvm.c. */
218 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
219 LLVMBuildExtractElement(base->gallivm->builder,
220 input, llvm_chan, "");
221 }
222 }
223
224 static void declare_input_fs(
225 struct si_shader_context * si_shader_ctx,
226 unsigned input_index,
227 const struct tgsi_full_declaration *decl)
228 {
229 const char * intr_name;
230 unsigned chan;
231 struct lp_build_context * base =
232 &si_shader_ctx->radeon_bld.soa.bld_base.base;
233 struct gallivm_state * gallivm = base->gallivm;
234
235 /* This value is:
236 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
237 * quad begins a new primitive. Bit 0 always needs
238 * to be unset)
239 * [32:16] ParamOffset
240 *
241 */
242 /* XXX: This register number must be identical to the S_00B02C_USER_SGPR
243 * register field value
244 */
245 LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6);
246
247
248 /* XXX: Is this the input_index? */
249 LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index);
250
251 /* XXX: Handle all possible interpolation modes */
252 switch (decl->Interp.Interpolate) {
253 case TGSI_INTERPOLATE_COLOR:
254 /* XXX: Flat shading hangs the GPU */
255 if (si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
256 #if 0
257 intr_name = "llvm.SI.fs.interp.constant";
258 #else
259 intr_name = "llvm.SI.fs.interp.linear.center";
260 #endif
261 } else {
262 if (decl->Interp.Centroid)
263 intr_name = "llvm.SI.fs.interp.persp.centroid";
264 else
265 intr_name = "llvm.SI.fs.interp.persp.center";
266 }
267 break;
268 case TGSI_INTERPOLATE_CONSTANT:
269 /* XXX: Flat shading hangs the GPU */
270 #if 0
271 intr_name = "llvm.SI.fs.interp.constant";
272 break;
273 #endif
274 case TGSI_INTERPOLATE_LINEAR:
275 if (decl->Interp.Centroid)
276 intr_name = "llvm.SI.fs.interp.linear.centroid";
277 else
278 intr_name = "llvm.SI.fs.interp.linear.center";
279 break;
280 case TGSI_INTERPOLATE_PERSPECTIVE:
281 if (decl->Interp.Centroid)
282 intr_name = "llvm.SI.fs.interp.persp.centroid";
283 else
284 intr_name = "llvm.SI.fs.interp.persp.center";
285 break;
286 default:
287 fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
288 return;
289 }
290
291 /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
292 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
293 LLVMValueRef args[3];
294 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
295 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
296 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
297 args[0] = llvm_chan;
298 args[1] = attr_number;
299 args[2] = params;
300 si_shader_ctx->radeon_bld.inputs[soa_index] =
301 lp_build_intrinsic(gallivm->builder, intr_name,
302 input_type, args, 3);
303 }
304 }
305
306 static void declare_input(
307 struct radeon_llvm_context * radeon_bld,
308 unsigned input_index,
309 const struct tgsi_full_declaration *decl)
310 {
311 struct si_shader_context * si_shader_ctx =
312 si_shader_context(&radeon_bld->soa.bld_base);
313 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
314 declare_input_vs(si_shader_ctx, input_index, decl);
315 } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
316 declare_input_fs(si_shader_ctx, input_index, decl);
317 } else {
318 fprintf(stderr, "Warning: Unsupported shader type,\n");
319 }
320 }
321
322 static LLVMValueRef fetch_constant(
323 struct lp_build_tgsi_context * bld_base,
324 const struct tgsi_full_src_register *reg,
325 enum tgsi_opcode_type type,
326 unsigned swizzle)
327 {
328 struct lp_build_context * base = &bld_base->base;
329
330 LLVMValueRef const_ptr;
331 LLVMValueRef offset;
332 LLVMValueRef load;
333
334 /* XXX: Assume the pointer to the constant buffer is being stored in
335 * SGPR[0:1] */
336 const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, 0);
337
338 /* XXX: This assumes that the constant buffer is not packed, so
339 * CONST[0].x will have an offset of 0 and CONST[1].x will have an
340 * offset of 4. */
341 offset = lp_build_const_int32(base->gallivm,
342 (reg->Register.Index * 4) + swizzle);
343
344 load = build_indexed_load(base->gallivm, const_ptr, offset);
345 return bitcast(bld_base, type, load);
346 }
347
348 /* XXX: This is partially implemented for VS only at this point. It is not complete */
349 static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
350 {
351 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
352 struct si_shader * shader = &si_shader_ctx->shader->shader;
353 struct lp_build_context * base = &bld_base->base;
354 struct lp_build_context * uint =
355 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
356 struct tgsi_parse_context *parse = &si_shader_ctx->parse;
357 LLVMValueRef last_args[9] = { 0 };
358
359 while (!tgsi_parse_end_of_tokens(parse)) {
360 /* XXX: component_bits controls which components of the output
361 * registers actually get exported. (e.g bit 0 means export
362 * X component, bit 1 means export Y component, etc.) I'm
363 * hard coding this to 0xf for now. In the future, we might
364 * want to do something else. */
365 unsigned component_bits = 0xf;
366 unsigned chan;
367 struct tgsi_full_declaration *d =
368 &parse->FullToken.FullDeclaration;
369 LLVMValueRef args[9];
370 unsigned target;
371 unsigned index;
372 unsigned color_count = 0;
373 unsigned param_count = 0;
374 int i;
375
376 tgsi_parse_token(parse);
377 if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
378 continue;
379
380 switch (d->Declaration.File) {
381 case TGSI_FILE_INPUT:
382 i = shader->ninput++;
383 shader->input[i].name = d->Semantic.Name;
384 shader->input[i].sid = d->Semantic.Index;
385 shader->input[i].interpolate = d->Interp.Interpolate;
386 shader->input[i].centroid = d->Interp.Centroid;
387 break;
388 case TGSI_FILE_OUTPUT:
389 i = shader->noutput++;
390 shader->output[i].name = d->Semantic.Name;
391 shader->output[i].sid = d->Semantic.Index;
392 shader->output[i].interpolate = d->Interp.Interpolate;
393 break;
394 }
395
396 if (d->Declaration.File != TGSI_FILE_OUTPUT)
397 continue;
398
399 for (index = d->Range.First; index <= d->Range.Last; index++) {
400 for (chan = 0; chan < 4; chan++ ) {
401 LLVMValueRef out_ptr =
402 si_shader_ctx->radeon_bld.soa.outputs
403 [index][chan];
404 /* +5 because the first output value will be
405 * the 6th argument to the intrinsic. */
406 args[chan + 5]= LLVMBuildLoad(
407 base->gallivm->builder, out_ptr, "");
408 }
409
410 /* XXX: We probably need to keep track of the output
411 * values, so we know what we are passing to the next
412 * stage. */
413
414 /* Select the correct target */
415 switch(d->Semantic.Name) {
416 case TGSI_SEMANTIC_POSITION:
417 target = V_008DFC_SQ_EXP_POS;
418 break;
419 case TGSI_SEMANTIC_COLOR:
420 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
421 target = V_008DFC_SQ_EXP_PARAM + param_count;
422 shader->output[i].param_offset = param_count;
423 param_count++;
424 } else {
425 target = V_008DFC_SQ_EXP_MRT + color_count;
426 color_count++;
427 }
428 break;
429 case TGSI_SEMANTIC_GENERIC:
430 target = V_008DFC_SQ_EXP_PARAM + param_count;
431 shader->output[i].param_offset = param_count;
432 param_count++;
433 break;
434 default:
435 target = 0;
436 fprintf(stderr,
437 "Warning: SI unhandled output type:%d\n",
438 d->Semantic.Name);
439 }
440
441 /* Specify which components to enable */
442 args[0] = lp_build_const_int32(base->gallivm,
443 component_bits);
444
445 /* Specify whether the EXEC mask represents the valid mask */
446 args[1] = lp_build_const_int32(base->gallivm, 0);
447
448 /* Specify whether this is the last export */
449 args[2] = lp_build_const_int32(base->gallivm, 0);
450
451 /* Specify the target we are exporting */
452 args[3] = lp_build_const_int32(base->gallivm, target);
453
454 /* Set COMPR flag to zero to export data as 32-bit */
455 args[4] = uint->zero;
456
457 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ?
458 (d->Semantic.Name == TGSI_SEMANTIC_POSITION) :
459 (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) {
460 if (last_args[0]) {
461 lp_build_intrinsic(base->gallivm->builder,
462 "llvm.SI.export",
463 LLVMVoidTypeInContext(base->gallivm->context),
464 last_args, 9);
465 }
466
467 memcpy(last_args, args, sizeof(args));
468 } else {
469 lp_build_intrinsic(base->gallivm->builder,
470 "llvm.SI.export",
471 LLVMVoidTypeInContext(base->gallivm->context),
472 args, 9);
473 }
474
475 }
476 }
477
478 /* Specify whether the EXEC mask represents the valid mask */
479 last_args[1] = lp_build_const_int32(base->gallivm,
480 si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
481
482 /* Specify that this is the last export */
483 last_args[2] = lp_build_const_int32(base->gallivm, 1);
484
485 lp_build_intrinsic(base->gallivm->builder,
486 "llvm.SI.export",
487 LLVMVoidTypeInContext(base->gallivm->context),
488 last_args, 9);
489
490 /* XXX: Look up what this function does */
491 /* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/
492 }
493
494 static void tex_fetch_args(
495 struct lp_build_tgsi_context * bld_base,
496 struct lp_build_emit_data * emit_data)
497 {
498 LLVMValueRef ptr;
499 LLVMValueRef offset;
500
501 /* WriteMask */
502 emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm,
503 emit_data->inst->Dst[0].Register.WriteMask);
504
505 /* Coordinates */
506 /* XXX: Not all sample instructions need 4 address arguments. */
507 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
508 0, LP_CHAN_ALL);
509
510 /* Resource */
511 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 4);
512 offset = lp_build_const_int32(bld_base->base.gallivm,
513 8 * emit_data->inst->Src[1].Register.Index);
514 emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
515 ptr, offset);
516
517 /* Sampler */
518 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 2);
519 offset = lp_build_const_int32(bld_base->base.gallivm,
520 4 * emit_data->inst->Src[1].Register.Index);
521 emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
522 ptr, offset);
523
524 /* Dimensions */
525 /* XXX: We might want to pass this information to the shader at some. */
526 /* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm,
527 emit_data->inst->Texture.Texture);
528 */
529
530 emit_data->arg_count = 4;
531 /* XXX: To optimize, we could use a float or v2f32, if the last bits of
532 * the writemask are clear */
533 emit_data->dst_type = LLVMVectorType(
534 LLVMFloatTypeInContext(bld_base->base.gallivm->context),
535 4);
536 }
537
538 static const struct lp_build_tgsi_action tex_action = {
539 .fetch_args = tex_fetch_args,
540 .emit = lp_build_tgsi_intrinsic,
541 .intr_name = "llvm.SI.sample"
542 };
543
544
545 int si_pipe_shader_create(
546 struct pipe_context *ctx,
547 struct si_pipe_shader *shader)
548 {
549 struct r600_context *rctx = (struct r600_context*)ctx;
550 struct si_shader_context si_shader_ctx;
551 struct tgsi_shader_info shader_info;
552 struct lp_build_tgsi_context * bld_base;
553 LLVMModuleRef mod;
554 unsigned char * inst_bytes;
555 unsigned inst_byte_count;
556 unsigned i;
557 bool dump;
558
559 dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE);
560
561 radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
562 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
563
564 tgsi_scan_shader(shader->tokens, &shader_info);
565 bld_base->info = &shader_info;
566 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
567 bld_base->emit_epilogue = si_llvm_emit_epilogue;
568
569 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
570
571 si_shader_ctx.radeon_bld.load_input = declare_input;
572 si_shader_ctx.tokens = shader->tokens;
573 tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
574 si_shader_ctx.shader = shader;
575 si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
576 si_shader_ctx.rctx = rctx;
577
578 shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs;
579
580 /* Dump TGSI code before doing TGSI->LLVM conversion in case the
581 * conversion fails. */
582 if (dump) {
583 tgsi_dump(shader->tokens, 0);
584 }
585
586 lp_build_tgsi_llvm(bld_base, shader->tokens);
587
588 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
589
590 mod = bld_base->base.gallivm->module;
591 if (dump) {
592 LLVMDumpModule(mod);
593 }
594 radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", dump);
595 if (dump) {
596 fprintf(stderr, "SI CODE:\n");
597 for (i = 0; i < inst_byte_count; i+=4 ) {
598 fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
599 inst_bytes[i + 2], inst_bytes[i + 1],
600 inst_bytes[i]);
601 }
602 }
603
604 shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
605 shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
606 shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
607
608 tgsi_parse_free(&si_shader_ctx.parse);
609
610 /* copy new shader */
611 if (shader->bo == NULL) {
612 uint32_t *ptr;
613
614 shader->bo = (struct r600_resource*)
615 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, inst_byte_count);
616 if (shader->bo == NULL) {
617 return -ENOMEM;
618 }
619 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
620 if (0 /*R600_BIG_ENDIAN*/) {
621 for (i = 0; i < (inst_byte_count-12)/4; ++i) {
622 ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
623 }
624 } else {
625 memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
626 }
627 rctx->ws->buffer_unmap(shader->bo->cs_buf);
628 }
629
630 free(inst_bytes);
631
632 return 0;
633 }
634
635 void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
636 {
637 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
638
639 memset(&shader->shader,0,sizeof(struct si_shader));
640 }