2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "ac_binary.h"
26 #include "util/u_math.h"
27 #include "util/u_memory.h"
35 #define SPILLED_SGPRS 0x4
36 #define SPILLED_VGPRS 0x8
38 /* Parse configuration data in .AMDGPU.config section format. */
39 void ac_parse_shader_binary_config(const char *data
, size_t nbytes
,
41 bool really_needs_scratch
,
42 struct ac_shader_config
*conf
)
44 uint32_t scratch_size
= 0;
46 for (size_t i
= 0; i
< nbytes
; i
+= 8) {
47 unsigned reg
= util_le32_to_cpu(*(uint32_t*)(data
+ i
));
48 unsigned value
= util_le32_to_cpu(*(uint32_t*)(data
+ i
+ 4));
50 case R_00B028_SPI_SHADER_PGM_RSRC1_PS
:
51 case R_00B128_SPI_SHADER_PGM_RSRC1_VS
:
52 case R_00B228_SPI_SHADER_PGM_RSRC1_GS
:
53 case R_00B848_COMPUTE_PGM_RSRC1
:
54 case R_00B428_SPI_SHADER_PGM_RSRC1_HS
:
56 conf
->num_vgprs
= MAX2(conf
->num_vgprs
, (G_00B028_VGPRS(value
) + 1) * 8);
58 conf
->num_vgprs
= MAX2(conf
->num_vgprs
, (G_00B028_VGPRS(value
) + 1) * 4);
60 conf
->num_sgprs
= MAX2(conf
->num_sgprs
, (G_00B028_SGPRS(value
) + 1) * 8);
61 conf
->float_mode
= G_00B028_FLOAT_MODE(value
);
64 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS
:
65 conf
->lds_size
= MAX2(conf
->lds_size
, G_00B02C_EXTRA_LDS_SIZE(value
));
66 conf
->num_shared_vgprs
= G_00B02C_SHARED_VGPR_CNT(value
);
69 case R_00B12C_SPI_SHADER_PGM_RSRC2_VS
:
70 conf
->num_shared_vgprs
= G_00B12C_SHARED_VGPR_CNT(value
);
73 case R_00B22C_SPI_SHADER_PGM_RSRC2_GS
:
74 conf
->num_shared_vgprs
= G_00B22C_SHARED_VGPR_CNT(value
);
77 case R_00B42C_SPI_SHADER_PGM_RSRC2_HS
:
78 conf
->num_shared_vgprs
= G_00B42C_SHARED_VGPR_CNT(value
);
81 case R_00B84C_COMPUTE_PGM_RSRC2
:
82 conf
->lds_size
= MAX2(conf
->lds_size
, G_00B84C_LDS_SIZE(value
));
85 case R_00B8A0_COMPUTE_PGM_RSRC3
:
86 conf
->num_shared_vgprs
= G_00B8A0_SHARED_VGPR_CNT(value
);
89 case R_0286CC_SPI_PS_INPUT_ENA
:
90 conf
->spi_ps_input_ena
= value
;
92 case R_0286D0_SPI_PS_INPUT_ADDR
:
93 conf
->spi_ps_input_addr
= value
;
95 case R_0286E8_SPI_TMPRING_SIZE
:
96 case R_00B860_COMPUTE_TMPRING_SIZE
:
97 /* WAVESIZE is in units of 256 dwords. */
101 conf
->spilled_sgprs
= value
;
104 conf
->spilled_vgprs
= value
;
111 fprintf(stderr
, "Warning: LLVM emitted unknown "
112 "config register: 0x%x\n", reg
);
120 if (!conf
->spi_ps_input_addr
)
121 conf
->spi_ps_input_addr
= conf
->spi_ps_input_ena
;
123 if (really_needs_scratch
) {
124 /* sgprs spills aren't spilling */
125 conf
->scratch_bytes_per_wave
= G_00B860_WAVESIZE(scratch_size
) * 256 * 4;