From 92d7ca4b1cdfe1ffc80748fa7eedf927f3c664f0 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Tue, 12 Mar 2019 13:49:26 -0600 Subject: [PATCH] gallium: add lima driver MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit v2: - use renamed util_dynarray_grow_cap - use DEBUG_GET_ONCE_FLAGS_OPTION for debug flags - remove DRM_FORMAT_MOD_ARM_AGTB_MODE0 usage - compute min/max index in driver v3: - fix plbu framebuffer state calculation - fix color_16pc assemble - use nir_lower_all_source_mods for lowering neg/abs/sat - use float arrary for static GPU data - add disassemble comment for static shader code - use drm_find_modifier v4: - use lima_nir_lower_uniform_to_scalar v5: - remove nir_opt_global_to_local when rebase Cc: Rob Clark Cc: Alyssa Rosenzweig Acked-by: Eric Anholt Signed-off-by: Andreas Baierl Signed-off-by: Arno Messiaen Signed-off-by: Connor Abbott Signed-off-by: Erico Nunes Signed-off-by: Heiko Stuebner Signed-off-by: Koen Kooi Signed-off-by: Marek Vasut Signed-off-by: marmeladema Signed-off-by: Paweł Chmiel Signed-off-by: Rob Herring Signed-off-by: Rohan Garg Signed-off-by: Vasily Khoruzhick Signed-off-by: Qiang Yu --- meson.build | 3 +- meson_options.txt | 2 +- .../auxiliary/pipe-loader/pipe_loader_drm.c | 5 + .../auxiliary/target-helpers/drm_helper.h | 23 + .../target-helpers/drm_helper_public.h | 3 + src/gallium/drivers/lima/ir/gp/codegen.c | 619 +++++++ src/gallium/drivers/lima/ir/gp/codegen.h | 166 ++ src/gallium/drivers/lima/ir/gp/disasm.c | 568 ++++++ src/gallium/drivers/lima/ir/gp/gpir.h | 392 ++++ src/gallium/drivers/lima/ir/gp/instr.c | 488 +++++ src/gallium/drivers/lima/ir/gp/lower.c | 529 ++++++ src/gallium/drivers/lima/ir/gp/nir.c | 422 +++++ src/gallium/drivers/lima/ir/gp/node.c | 492 +++++ .../drivers/lima/ir/gp/physical_regalloc.c | 135 ++ .../drivers/lima/ir/gp/reduce_scheduler.c | 220 +++ src/gallium/drivers/lima/ir/gp/scheduler.c | 809 ++++++++ .../drivers/lima/ir/gp/value_regalloc.c | 170 ++ src/gallium/drivers/lima/ir/lima_ir.h | 66 + .../ir/lima_nir_lower_uniform_to_scalar.c | 83 + src/gallium/drivers/lima/ir/pp/codegen.c | 669 +++++++ src/gallium/drivers/lima/ir/pp/codegen.h | 359 ++++ src/gallium/drivers/lima/ir/pp/disasm.c | 776 ++++++++ src/gallium/drivers/lima/ir/pp/instr.c | 311 ++++ src/gallium/drivers/lima/ir/pp/lower.c | 421 +++++ src/gallium/drivers/lima/ir/pp/nir.c | 494 +++++ src/gallium/drivers/lima/ir/pp/node.c | 426 +++++ .../drivers/lima/ir/pp/node_to_instr.c | 401 ++++ src/gallium/drivers/lima/ir/pp/ppir.h | 512 +++++ src/gallium/drivers/lima/ir/pp/regalloc.c | 757 ++++++++ src/gallium/drivers/lima/ir/pp/scheduler.c | 197 ++ src/gallium/drivers/lima/lima_bo.c | 337 ++++ src/gallium/drivers/lima/lima_bo.h | 66 + src/gallium/drivers/lima/lima_context.c | 262 +++ src/gallium/drivers/lima/lima_context.h | 294 +++ src/gallium/drivers/lima/lima_draw.c | 1648 +++++++++++++++++ src/gallium/drivers/lima/lima_fence.c | 120 ++ src/gallium/drivers/lima/lima_fence.h | 36 + src/gallium/drivers/lima/lima_program.c | 317 ++++ src/gallium/drivers/lima/lima_program.h | 35 + src/gallium/drivers/lima/lima_query.c | 96 + src/gallium/drivers/lima/lima_resource.c | 589 ++++++ src/gallium/drivers/lima/lima_resource.h | 86 + src/gallium/drivers/lima/lima_screen.c | 546 ++++++ src/gallium/drivers/lima/lima_screen.h | 93 + src/gallium/drivers/lima/lima_state.c | 506 +++++ src/gallium/drivers/lima/lima_submit.c | 184 ++ src/gallium/drivers/lima/lima_submit.h | 43 + src/gallium/drivers/lima/lima_texture.c | 278 +++ src/gallium/drivers/lima/lima_texture.h | 35 + src/gallium/drivers/lima/lima_tiling.c | 184 ++ src/gallium/drivers/lima/lima_tiling.h | 44 + src/gallium/drivers/lima/lima_util.c | 80 + src/gallium/drivers/lima/lima_util.h | 37 + src/gallium/drivers/lima/meson.build | 89 + src/gallium/meson.build | 6 + src/gallium/targets/dri/meson.build | 5 +- src/gallium/targets/dri/target.c | 3 + src/gallium/winsys/lima/drm/lima_drm_public.h | 35 + src/gallium/winsys/lima/drm/lima_drm_winsys.c | 124 ++ src/gallium/winsys/lima/drm/meson.build | 29 + 60 files changed, 16721 insertions(+), 4 deletions(-) create mode 100644 src/gallium/drivers/lima/ir/gp/codegen.c create mode 100644 src/gallium/drivers/lima/ir/gp/codegen.h create mode 100644 src/gallium/drivers/lima/ir/gp/disasm.c create mode 100644 src/gallium/drivers/lima/ir/gp/gpir.h create mode 100644 src/gallium/drivers/lima/ir/gp/instr.c create mode 100644 src/gallium/drivers/lima/ir/gp/lower.c create mode 100644 src/gallium/drivers/lima/ir/gp/nir.c create mode 100644 src/gallium/drivers/lima/ir/gp/node.c create mode 100644 src/gallium/drivers/lima/ir/gp/physical_regalloc.c create mode 100644 src/gallium/drivers/lima/ir/gp/reduce_scheduler.c create mode 100644 src/gallium/drivers/lima/ir/gp/scheduler.c create mode 100644 src/gallium/drivers/lima/ir/gp/value_regalloc.c create mode 100644 src/gallium/drivers/lima/ir/lima_ir.h create mode 100644 src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c create mode 100644 src/gallium/drivers/lima/ir/pp/codegen.c create mode 100644 src/gallium/drivers/lima/ir/pp/codegen.h create mode 100644 src/gallium/drivers/lima/ir/pp/disasm.c create mode 100644 src/gallium/drivers/lima/ir/pp/instr.c create mode 100644 src/gallium/drivers/lima/ir/pp/lower.c create mode 100644 src/gallium/drivers/lima/ir/pp/nir.c create mode 100644 src/gallium/drivers/lima/ir/pp/node.c create mode 100644 src/gallium/drivers/lima/ir/pp/node_to_instr.c create mode 100644 src/gallium/drivers/lima/ir/pp/ppir.h create mode 100644 src/gallium/drivers/lima/ir/pp/regalloc.c create mode 100644 src/gallium/drivers/lima/ir/pp/scheduler.c create mode 100644 src/gallium/drivers/lima/lima_bo.c create mode 100644 src/gallium/drivers/lima/lima_bo.h create mode 100644 src/gallium/drivers/lima/lima_context.c create mode 100644 src/gallium/drivers/lima/lima_context.h create mode 100644 src/gallium/drivers/lima/lima_draw.c create mode 100644 src/gallium/drivers/lima/lima_fence.c create mode 100644 src/gallium/drivers/lima/lima_fence.h create mode 100644 src/gallium/drivers/lima/lima_program.c create mode 100644 src/gallium/drivers/lima/lima_program.h create mode 100644 src/gallium/drivers/lima/lima_query.c create mode 100644 src/gallium/drivers/lima/lima_resource.c create mode 100644 src/gallium/drivers/lima/lima_resource.h create mode 100644 src/gallium/drivers/lima/lima_screen.c create mode 100644 src/gallium/drivers/lima/lima_screen.h create mode 100644 src/gallium/drivers/lima/lima_state.c create mode 100644 src/gallium/drivers/lima/lima_submit.c create mode 100644 src/gallium/drivers/lima/lima_submit.h create mode 100644 src/gallium/drivers/lima/lima_texture.c create mode 100644 src/gallium/drivers/lima/lima_texture.h create mode 100644 src/gallium/drivers/lima/lima_tiling.c create mode 100644 src/gallium/drivers/lima/lima_tiling.h create mode 100644 src/gallium/drivers/lima/lima_util.c create mode 100644 src/gallium/drivers/lima/lima_util.h create mode 100644 src/gallium/drivers/lima/meson.build create mode 100644 src/gallium/winsys/lima/drm/lima_drm_public.h create mode 100644 src/gallium/winsys/lima/drm/lima_drm_winsys.c create mode 100644 src/gallium/winsys/lima/drm/meson.build diff --git a/meson.build b/meson.build index 2c98e9e18a9..ae52ac137c0 100644 --- a/meson.build +++ b/meson.build @@ -132,7 +132,7 @@ if _drivers.contains('auto') elif ['arm', 'aarch64'].contains(host_machine.cpu_family()) _drivers = [ 'kmsro', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'nouveau', - 'tegra', 'virgl', 'swrast' + 'tegra', 'virgl', 'lima', 'swrast' ] else error('Unknown architecture @0@. Please pass -Dgallium-drivers to set driver options. Patches gladly accepted to fix this.'.format( @@ -162,6 +162,7 @@ with_gallium_i915 = _drivers.contains('i915') with_gallium_svga = _drivers.contains('svga') with_gallium_virgl = _drivers.contains('virgl') with_gallium_swr = _drivers.contains('swr') +with_gallium_lima = _drivers.contains('lima') if cc.get_id() == 'intel' if meson.version().version_compare('< 0.49.0') diff --git a/meson_options.txt b/meson_options.txt index 5c063054ca1..81e4a8906a9 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -60,7 +60,7 @@ option( choices : [ '', 'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno', 'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl', - 'swr', 'panfrost', 'iris' + 'swr', 'panfrost', 'iris', 'lima' ], description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built' ) diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c index 17d4a58e393..a9028394d4d 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c @@ -141,6 +141,11 @@ static const struct drm_driver_descriptor driver_descriptors[] = { .create_screen = pipe_tegra_create_screen, .configuration = pipe_default_configuration_query, }, + { + .driver_name = "lima", + .create_screen = pipe_lima_create_screen, + .configuration = pipe_default_configuration_query, + }, }; static const struct drm_driver_descriptor default_driver_descriptor = { diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h index 57ca74dfcb0..3280312729e 100644 --- a/src/gallium/auxiliary/target-helpers/drm_helper.h +++ b/src/gallium/auxiliary/target-helpers/drm_helper.h @@ -425,4 +425,27 @@ pipe_tegra_create_screen(int fd, const struct pipe_screen_config *config) #endif +#ifdef GALLIUM_LIMA +#include "lima/drm/lima_drm_public.h" + +struct pipe_screen * +pipe_lima_create_screen(int fd, const struct pipe_screen_config *config) +{ + struct pipe_screen *screen; + + screen = lima_drm_screen_create(fd); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_lima_create_screen(int fd, const struct pipe_screen_config *config) +{ + fprintf(stderr, "lima: driver missing\n"); + return NULL; +} + +#endif + #endif /* DRM_HELPER_H */ diff --git a/src/gallium/auxiliary/target-helpers/drm_helper_public.h b/src/gallium/auxiliary/target-helpers/drm_helper_public.h index 1f36ccb6945..2cf05eb4577 100644 --- a/src/gallium/auxiliary/target-helpers/drm_helper_public.h +++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h @@ -57,6 +57,9 @@ pipe_imx_drm_create_screen(int fd, const struct pipe_screen_config *config); struct pipe_screen * pipe_tegra_create_screen(int fd, const struct pipe_screen_config *config); +struct pipe_screen * +pipe_lima_create_screen(int fd, const struct pipe_screen_config *config); + const struct drm_conf_ret * pipe_default_configuration_query(enum drm_conf conf); diff --git a/src/gallium/drivers/lima/ir/gp/codegen.c b/src/gallium/drivers/lima/ir/gp/codegen.c new file mode 100644 index 00000000000..798cf8ec88e --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/codegen.c @@ -0,0 +1,619 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/ralloc.h" + +#include "gpir.h" +#include "codegen.h" +#include "lima_context.h" + +static gpir_codegen_src gpir_get_alu_input(gpir_node *parent, gpir_node *child) +{ + static const int slot_to_src[GPIR_INSTR_SLOT_NUM][3] = { + [GPIR_INSTR_SLOT_MUL0] = { + gpir_codegen_src_unused, gpir_codegen_src_p1_mul_0, gpir_codegen_src_p2_mul_0 }, + [GPIR_INSTR_SLOT_MUL1] = { + gpir_codegen_src_unused, gpir_codegen_src_p1_mul_1, gpir_codegen_src_p2_mul_1 }, + + [GPIR_INSTR_SLOT_ADD0] = { + gpir_codegen_src_unused, gpir_codegen_src_p1_acc_0, gpir_codegen_src_p2_acc_0 }, + [GPIR_INSTR_SLOT_ADD1] = { + gpir_codegen_src_unused, gpir_codegen_src_p1_acc_1, gpir_codegen_src_p2_acc_1 }, + + [GPIR_INSTR_SLOT_COMPLEX] = { + gpir_codegen_src_unused, gpir_codegen_src_p1_complex, gpir_codegen_src_unused }, + [GPIR_INSTR_SLOT_PASS] = { + gpir_codegen_src_unused, gpir_codegen_src_p1_pass, gpir_codegen_src_p2_pass }, + [GPIR_INSTR_SLOT_BRANCH] = { + gpir_codegen_src_unused, gpir_codegen_src_unused, gpir_codegen_src_unused }, + + [GPIR_INSTR_SLOT_REG0_LOAD0] = { + gpir_codegen_src_attrib_x, gpir_codegen_src_p1_attrib_x, gpir_codegen_src_unused }, + [GPIR_INSTR_SLOT_REG0_LOAD1] = { + gpir_codegen_src_attrib_y, gpir_codegen_src_p1_attrib_y, gpir_codegen_src_unused }, + [GPIR_INSTR_SLOT_REG0_LOAD2] = { + gpir_codegen_src_attrib_z, gpir_codegen_src_p1_attrib_z, gpir_codegen_src_unused }, + [GPIR_INSTR_SLOT_REG0_LOAD3] = { + gpir_codegen_src_attrib_w, gpir_codegen_src_p1_attrib_w, gpir_codegen_src_unused }, + + [GPIR_INSTR_SLOT_REG1_LOAD0] = { + gpir_codegen_src_register_x, gpir_codegen_src_unused, gpir_codegen_src_unused}, + [GPIR_INSTR_SLOT_REG1_LOAD1] = { + gpir_codegen_src_register_y, gpir_codegen_src_unused, gpir_codegen_src_unused}, + [GPIR_INSTR_SLOT_REG1_LOAD2] = { + gpir_codegen_src_register_z, gpir_codegen_src_unused, gpir_codegen_src_unused}, + [GPIR_INSTR_SLOT_REG1_LOAD3] = { + gpir_codegen_src_register_w, gpir_codegen_src_unused, gpir_codegen_src_unused}, + + [GPIR_INSTR_SLOT_MEM_LOAD0] = { + gpir_codegen_src_load_x, gpir_codegen_src_unused, gpir_codegen_src_unused }, + [GPIR_INSTR_SLOT_MEM_LOAD1] = { + gpir_codegen_src_load_y, gpir_codegen_src_unused, gpir_codegen_src_unused }, + [GPIR_INSTR_SLOT_MEM_LOAD2] = { + gpir_codegen_src_load_z, gpir_codegen_src_unused, gpir_codegen_src_unused }, + [GPIR_INSTR_SLOT_MEM_LOAD3] = { + gpir_codegen_src_load_w, gpir_codegen_src_unused, gpir_codegen_src_unused }, + }; + + assert(child->sched.instr - parent->sched.instr < 3); + + return slot_to_src[child->sched.pos][child->sched.instr - parent->sched.instr]; +} + +static void gpir_codegen_mul0_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + gpir_node *node = instr->slots[GPIR_INSTR_SLOT_MUL0]; + + if (!node) { + code->mul0_src0 = gpir_codegen_src_unused; + code->mul0_src1 = gpir_codegen_src_unused; + return; + } + + gpir_alu_node *alu = gpir_node_to_alu(node); + + switch (node->op) { + case gpir_op_mul: + code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]); + code->mul0_src1 = gpir_get_alu_input(node, alu->children[1]); + if (code->mul0_src1 == gpir_codegen_src_p1_complex) { + /* Will get confused with gpir_codegen_src_ident, so need to swap inputs */ + code->mul0_src1 = code->mul0_src0; + code->mul0_src0 = gpir_codegen_src_p1_complex; + } + + code->mul0_neg = alu->dest_negate; + if (alu->children_negate[0]) + code->mul0_neg = !code->mul0_neg; + if (alu->children_negate[1]) + code->mul0_neg = !code->mul0_neg; + break; + + case gpir_op_neg: + code->mul0_neg = true; + case gpir_op_mov: + code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]); + code->mul0_src1 = gpir_codegen_src_ident; + break; + + case gpir_op_complex1: + code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]); + code->mul0_src1 = gpir_get_alu_input(node, alu->children[1]); + code->mul_op = gpir_codegen_mul_op_complex1; + break; + + case gpir_op_complex2: + code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]); + code->mul0_src1 = code->mul0_src0; + code->mul_op = gpir_codegen_mul_op_complex2; + break; + + case gpir_op_select: + code->mul0_src0 = gpir_get_alu_input(node, alu->children[2]); + code->mul0_src1 = gpir_get_alu_input(node, alu->children[0]); + code->mul_op = gpir_codegen_mul_op_select; + break; + + default: + assert(0); + } +} + +static void gpir_codegen_mul1_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + gpir_node *node = instr->slots[GPIR_INSTR_SLOT_MUL1]; + + if (!node) { + code->mul1_src0 = gpir_codegen_src_unused; + code->mul1_src1 = gpir_codegen_src_unused; + return; + } + + gpir_alu_node *alu = gpir_node_to_alu(node); + + switch (node->op) { + case gpir_op_mul: + code->mul1_src0 = gpir_get_alu_input(node, alu->children[0]); + code->mul1_src1 = gpir_get_alu_input(node, alu->children[1]); + if (code->mul1_src1 == gpir_codegen_src_p1_complex) { + /* Will get confused with gpir_codegen_src_ident, so need to swap inputs */ + code->mul1_src1 = code->mul1_src0; + code->mul1_src0 = gpir_codegen_src_p1_complex; + } + + code->mul1_neg = alu->dest_negate; + if (alu->children_negate[0]) + code->mul1_neg = !code->mul1_neg; + if (alu->children_negate[1]) + code->mul1_neg = !code->mul1_neg; + break; + + case gpir_op_neg: + code->mul1_neg = true; + case gpir_op_mov: + code->mul1_src0 = gpir_get_alu_input(node, alu->children[0]); + code->mul1_src1 = gpir_codegen_src_ident; + break; + + case gpir_op_complex1: + code->mul1_src0 = gpir_get_alu_input(node, alu->children[0]); + code->mul1_src1 = gpir_get_alu_input(node, alu->children[2]); + break; + + case gpir_op_select: + code->mul1_src0 = gpir_get_alu_input(node, alu->children[1]); + code->mul1_src1 = gpir_codegen_src_unused; + break; + + default: + assert(0); + } +} + +static void gpir_codegen_add0_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + gpir_node *node = instr->slots[GPIR_INSTR_SLOT_ADD0]; + + if (!node) { + code->acc0_src0 = gpir_codegen_src_unused; + code->acc0_src1 = gpir_codegen_src_unused; + return; + } + + gpir_alu_node *alu = gpir_node_to_alu(node); + + switch (node->op) { + case gpir_op_add: + case gpir_op_min: + case gpir_op_max: + case gpir_op_lt: + case gpir_op_ge: + code->acc0_src0 = gpir_get_alu_input(node, alu->children[0]); + code->acc0_src1 = gpir_get_alu_input(node, alu->children[1]); + + code->acc0_src0_neg = alu->children_negate[0]; + code->acc0_src1_neg = alu->children_negate[1]; + + switch (node->op) { + case gpir_op_add: + code->acc_op = gpir_codegen_acc_op_add; + if (code->acc0_src1 == gpir_codegen_src_p1_complex) { + code->acc0_src1 = code->acc0_src0; + code->acc0_src0 = gpir_codegen_src_p1_complex; + + bool tmp = code->acc0_src0_neg; + code->acc0_src0_neg = code->acc0_src1_neg; + code->acc0_src1_neg = tmp; + } + break; + case gpir_op_min: + code->acc_op = gpir_codegen_acc_op_min; + break; + case gpir_op_max: + code->acc_op = gpir_codegen_acc_op_max; + break; + case gpir_op_lt: + code->acc_op = gpir_codegen_acc_op_lt; + break; + case gpir_op_ge: + code->acc_op = gpir_codegen_acc_op_ge; + break; + default: + assert(0); + } + + break; + + case gpir_op_floor: + case gpir_op_sign: + code->acc0_src0 = gpir_get_alu_input(node, alu->children[0]); + code->acc0_src0_neg = alu->children_negate[0]; + switch (node->op) { + case gpir_op_floor: + code->acc_op = gpir_codegen_acc_op_floor; + break; + case gpir_op_sign: + code->acc_op = gpir_codegen_acc_op_sign; + break; + default: + assert(0); + } + break; + + case gpir_op_neg: + code->acc0_src0_neg = true; + case gpir_op_mov: + code->acc_op = gpir_codegen_acc_op_add; + code->acc0_src0 = gpir_get_alu_input(node, alu->children[0]); + code->acc0_src1 = gpir_codegen_src_ident; + code->acc0_src1_neg = true; + break; + + default: + assert(0); + } +} + +static void gpir_codegen_add1_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + gpir_node *node = instr->slots[GPIR_INSTR_SLOT_ADD1]; + + if (!node) { + code->acc1_src0 = gpir_codegen_src_unused; + code->acc1_src1 = gpir_codegen_src_unused; + return; + } + + gpir_alu_node *alu = gpir_node_to_alu(node); + + switch (node->op) { + case gpir_op_add: + case gpir_op_min: + case gpir_op_max: + case gpir_op_lt: + case gpir_op_ge: + code->acc1_src0 = gpir_get_alu_input(node, alu->children[0]); + code->acc1_src1 = gpir_get_alu_input(node, alu->children[1]); + + code->acc1_src0_neg = alu->children_negate[0]; + code->acc1_src1_neg = alu->children_negate[1]; + + switch (node->op) { + case gpir_op_add: + code->acc_op = gpir_codegen_acc_op_add; + if (code->acc1_src1 == gpir_codegen_src_p1_complex) { + code->acc1_src1 = code->acc1_src0; + code->acc1_src0 = gpir_codegen_src_p1_complex; + + bool tmp = code->acc1_src0_neg; + code->acc1_src0_neg = code->acc1_src1_neg; + code->acc1_src1_neg = tmp; + } + break; + case gpir_op_min: + code->acc_op = gpir_codegen_acc_op_min; + break; + case gpir_op_max: + code->acc_op = gpir_codegen_acc_op_max; + break; + case gpir_op_lt: + code->acc_op = gpir_codegen_acc_op_lt; + break; + case gpir_op_ge: + code->acc_op = gpir_codegen_acc_op_ge; + break; + default: + assert(0); + } + + break; + + case gpir_op_floor: + case gpir_op_sign: + code->acc1_src0 = gpir_get_alu_input(node, alu->children[0]); + code->acc1_src0_neg = alu->children_negate[0]; + switch (node->op) { + case gpir_op_floor: + code->acc_op = gpir_codegen_acc_op_floor; + break; + case gpir_op_sign: + code->acc_op = gpir_codegen_acc_op_sign; + break; + default: + assert(0); + } + break; + + case gpir_op_neg: + code->acc1_src0_neg = true; + case gpir_op_mov: + code->acc_op = gpir_codegen_acc_op_add; + code->acc1_src0 = gpir_get_alu_input(node, alu->children[0]); + code->acc1_src1 = gpir_codegen_src_ident; + code->acc1_src1_neg = true; + break; + + default: + assert(0); + } +} + +static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + gpir_node *node = instr->slots[GPIR_INSTR_SLOT_COMPLEX]; + + if (!node) { + code->complex_src = gpir_codegen_src_unused; + return; + } + + switch (node->op) { + case gpir_op_mov: + case gpir_op_rcp_impl: + case gpir_op_rsqrt_impl: + { + gpir_alu_node *alu = gpir_node_to_alu(node); + code->complex_src = gpir_get_alu_input(node, alu->children[0]); + break; + } + default: + assert(0); + } + + switch (node->op) { + case gpir_op_mov: + code->complex_op = gpir_codegen_complex_op_pass; + break; + case gpir_op_rcp_impl: + code->complex_op = gpir_codegen_complex_op_rcp; + break; + case gpir_op_rsqrt_impl: + code->complex_op = gpir_codegen_complex_op_rsqrt; + break; + default: + assert(0); + } +} + +static void gpir_codegen_pass_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + gpir_node *node = instr->slots[GPIR_INSTR_SLOT_PASS]; + + if (!node) { + code->pass_op = gpir_codegen_pass_op_pass; + code->pass_src = gpir_codegen_src_unused; + return; + } + + switch (node->op) { + case gpir_op_mov: + { + gpir_alu_node *alu = gpir_node_to_alu(node); + code->pass_src = gpir_get_alu_input(node, alu->children[0]); + code->pass_op = gpir_codegen_pass_op_pass; + break; + } + default: + assert(0); + } +} + +static void gpir_codegen_branch_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + gpir_node *node = instr->slots[GPIR_INSTR_SLOT_BRANCH]; + + if (!node) + return; + + assert(0); +} + +static void gpir_codegen_reg0_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + if (!instr->reg0_use_count) + return; + + code->register0_attribute = instr->reg0_is_attr; + code->register0_addr = instr->reg0_index; +} + +static void gpir_codegen_reg1_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + if (!instr->reg1_use_count) + return; + + code->register1_addr = instr->reg1_index; +} + +static void gpir_codegen_mem_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + if (!instr->mem_use_count) { + code->load_offset = gpir_codegen_load_off_none; + return; + } + + code->load_addr = instr->mem_index; + code->load_offset = gpir_codegen_load_off_none; +} + +static gpir_codegen_store_src gpir_get_store_input(gpir_node *node) +{ + static int slot_to_src[GPIR_INSTR_SLOT_NUM] = { + [GPIR_INSTR_SLOT_MUL0] = gpir_codegen_store_src_mul_0, + [GPIR_INSTR_SLOT_MUL1] = gpir_codegen_store_src_mul_1, + [GPIR_INSTR_SLOT_ADD0] = gpir_codegen_store_src_acc_0, + [GPIR_INSTR_SLOT_ADD1] = gpir_codegen_store_src_acc_1, + [GPIR_INSTR_SLOT_COMPLEX] = gpir_codegen_store_src_complex, + [GPIR_INSTR_SLOT_PASS] = gpir_codegen_store_src_pass, + [GPIR_INSTR_SLOT_BRANCH...GPIR_INSTR_SLOT_STORE3] = gpir_codegen_store_src_none, + }; + + gpir_store_node *store = gpir_node_to_store(node); + return slot_to_src[store->child->sched.pos]; +} + +static void gpir_codegen_store_slot(gpir_codegen_instr *code, gpir_instr *instr) +{ + + gpir_node *node = instr->slots[GPIR_INSTR_SLOT_STORE0]; + if (node) + code->store0_src_x = gpir_get_store_input(node); + else + code->store0_src_x = gpir_codegen_store_src_none; + + node = instr->slots[GPIR_INSTR_SLOT_STORE1]; + if (node) + code->store0_src_y = gpir_get_store_input(node); + else + code->store0_src_y = gpir_codegen_store_src_none; + + node = instr->slots[GPIR_INSTR_SLOT_STORE2]; + if (node) + code->store1_src_z = gpir_get_store_input(node); + else + code->store1_src_z = gpir_codegen_store_src_none; + + node = instr->slots[GPIR_INSTR_SLOT_STORE3]; + if (node) + code->store1_src_w = gpir_get_store_input(node); + else + code->store1_src_w = gpir_codegen_store_src_none; + + if (instr->store_content[0] == GPIR_INSTR_STORE_TEMP) { + code->store0_temporary = true; + code->unknown_1 = 12; + } + else { + code->store0_varying = instr->store_content[0] == GPIR_INSTR_STORE_VARYING; + code->store0_addr = instr->store_index[0]; + } + + if (instr->store_content[1] == GPIR_INSTR_STORE_TEMP) { + code->store1_temporary = true; + code->unknown_1 = 12; + } + else { + code->store1_varying = instr->store_content[1] == GPIR_INSTR_STORE_VARYING; + code->store1_addr = instr->store_index[1]; + } +} + +static void gpir_codegen(gpir_codegen_instr *code, gpir_instr *instr) +{ + gpir_codegen_mul0_slot(code, instr); + gpir_codegen_mul1_slot(code, instr); + + gpir_codegen_add0_slot(code, instr); + gpir_codegen_add1_slot(code, instr); + + gpir_codegen_complex_slot(code, instr); + gpir_codegen_pass_slot(code, instr); + gpir_codegen_branch_slot(code, instr); + + gpir_codegen_reg0_slot(code, instr); + gpir_codegen_reg1_slot(code, instr); + gpir_codegen_mem_slot(code, instr); + + gpir_codegen_store_slot(code, instr); +} + +static void gpir_codegen_print_prog(gpir_compiler *comp) +{ + uint32_t *data = comp->prog->shader; + int size = comp->prog->shader_size; + int num_instr = size / sizeof(gpir_codegen_instr); + int num_dword_per_instr = sizeof(gpir_codegen_instr) / sizeof(uint32_t); + + for (int i = 0; i < num_instr; i++) { + printf("%03d: ", i); + for (int j = 0; j < num_dword_per_instr; j++) + printf("%08x ", data[i * num_dword_per_instr + j]); + printf("\n"); + } +} + +bool gpir_codegen_prog(gpir_compiler *comp) +{ + int num_instr = 0; + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + num_instr += list_length(&block->instr_list); + } + + gpir_codegen_instr *code = rzalloc_array(comp->prog, gpir_codegen_instr, num_instr); + if (!code) + return false; + + int instr_index = 0; + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_instr, instr, &block->instr_list, list) { + gpir_codegen(code + instr_index, instr); + instr_index++; + } + } + + for (int i = 0; i < num_instr; i++) { + if (code[i].register0_attribute) + comp->prog->prefetch = i; + } + + comp->prog->shader = code; + comp->prog->shader_size = num_instr * sizeof(gpir_codegen_instr); + + if (lima_debug & LIMA_DEBUG_GP) { + gpir_codegen_print_prog(comp); + gpir_disassemble_program(code, num_instr); + } + + return true; +} + +static gpir_codegen_acc_op gpir_codegen_get_acc_op(gpir_op op) +{ + switch (op) { + case gpir_op_add: + case gpir_op_neg: + case gpir_op_mov: + return gpir_codegen_acc_op_add; + case gpir_op_min: + return gpir_codegen_acc_op_min; + case gpir_op_max: + return gpir_codegen_acc_op_max; + case gpir_op_lt: + return gpir_codegen_acc_op_lt; + case gpir_op_ge: + return gpir_codegen_acc_op_ge; + case gpir_op_floor: + return gpir_codegen_acc_op_floor; + case gpir_op_sign: + return gpir_codegen_acc_op_sign; + default: + assert(0); + } + return -1; +} + +bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2) +{ + return gpir_codegen_get_acc_op(op1) == gpir_codegen_get_acc_op(op2); +} diff --git a/src/gallium/drivers/lima/ir/gp/codegen.h b/src/gallium/drivers/lima/ir/gp/codegen.h new file mode 100644 index 00000000000..d24b31b41f7 --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/codegen.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2017 Lima Project + * Copyright (c) 2013 Ben Brewer (ben.brewer@codethink.co.uk) + * Copyright (c) 2013 Connor Abbott (connor@abbott.cx) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef LIMA_IR_GP_CODEGEN_H +#define LIMA_IR_GP_CODEGEN_H + +typedef enum { + gpir_codegen_src_attrib_x = 0, + gpir_codegen_src_attrib_y = 1, + gpir_codegen_src_attrib_z = 2, + gpir_codegen_src_attrib_w = 3, + gpir_codegen_src_register_x = 4, + gpir_codegen_src_register_y = 5, + gpir_codegen_src_register_z = 6, + gpir_codegen_src_register_w = 7, + gpir_codegen_src_unknown_0 = 8, + gpir_codegen_src_unknown_1 = 9, + gpir_codegen_src_unknown_2 = 10, + gpir_codegen_src_unknown_3 = 11, + gpir_codegen_src_load_x = 12, + gpir_codegen_src_load_y = 13, + gpir_codegen_src_load_z = 14, + gpir_codegen_src_load_w = 15, + gpir_codegen_src_p1_acc_0 = 16, + gpir_codegen_src_p1_acc_1 = 17, + gpir_codegen_src_p1_mul_0 = 18, + gpir_codegen_src_p1_mul_1 = 19, + gpir_codegen_src_p1_pass = 20, + gpir_codegen_src_unused = 21, + gpir_codegen_src_ident = 22, + gpir_codegen_src_p1_complex = 22, + gpir_codegen_src_p2_pass = 23, + gpir_codegen_src_p2_acc_0 = 24, + gpir_codegen_src_p2_acc_1 = 25, + gpir_codegen_src_p2_mul_0 = 26, + gpir_codegen_src_p2_mul_1 = 27, + gpir_codegen_src_p1_attrib_x = 28, + gpir_codegen_src_p1_attrib_y = 29, + gpir_codegen_src_p1_attrib_z = 30, + gpir_codegen_src_p1_attrib_w = 31, +} gpir_codegen_src; + +typedef enum { + gpir_codegen_load_off_ld_addr_0 = 1, + gpir_codegen_load_off_ld_addr_1 = 2, + gpir_codegen_load_off_ld_addr_2 = 3, + gpir_codegen_load_off_none = 7, +} gpir_codegen_load_off; + +typedef enum { + gpir_codegen_store_src_acc_0 = 0, + gpir_codegen_store_src_acc_1 = 1, + gpir_codegen_store_src_mul_0 = 2, + gpir_codegen_store_src_mul_1 = 3, + gpir_codegen_store_src_pass = 4, + gpir_codegen_store_src_unknown = 5, + gpir_codegen_store_src_complex = 6, + gpir_codegen_store_src_none = 7, +} gpir_codegen_store_src; + +typedef enum { + gpir_codegen_acc_op_add = 0, + gpir_codegen_acc_op_floor = 1, + gpir_codegen_acc_op_sign = 2, + gpir_codegen_acc_op_ge = 4, + gpir_codegen_acc_op_lt = 5, + gpir_codegen_acc_op_min = 6, + gpir_codegen_acc_op_max = 7, +} gpir_codegen_acc_op; + +typedef enum { + gpir_codegen_complex_op_nop = 0, + gpir_codegen_complex_op_exp2 = 2, + gpir_codegen_complex_op_log2 = 3, + gpir_codegen_complex_op_rsqrt = 4, + gpir_codegen_complex_op_rcp = 5, + gpir_codegen_complex_op_pass = 9, + gpir_codegen_complex_op_temp_store_addr = 12, + gpir_codegen_complex_op_temp_load_addr_0 = 13, + gpir_codegen_complex_op_temp_load_addr_1 = 14, + gpir_codegen_complex_op_temp_load_addr_2 = 15, +} gpir_codegen_complex_op; + +typedef enum { + gpir_codegen_mul_op_mul = 0, + gpir_codegen_mul_op_complex1 = 1, + gpir_codegen_mul_op_complex2 = 3, + gpir_codegen_mul_op_select = 4, +} gpir_codegen_mul_op; + +typedef enum { + gpir_codegen_pass_op_pass = 2, + gpir_codegen_pass_op_preexp2 = 4, + gpir_codegen_pass_op_postlog2 = 5, + gpir_codegen_pass_op_clamp = 6, +} gpir_codegen_pass_op; + + +typedef struct __attribute__((__packed__)) { + gpir_codegen_src mul0_src0 : 5; + gpir_codegen_src mul0_src1 : 5; + gpir_codegen_src mul1_src0 : 5; + gpir_codegen_src mul1_src1 : 5; + bool mul0_neg : 1; + bool mul1_neg : 1; + gpir_codegen_src acc0_src0 : 5; + gpir_codegen_src acc0_src1 : 5; + gpir_codegen_src acc1_src0 : 5; + gpir_codegen_src acc1_src1 : 5; + bool acc0_src0_neg : 1; + bool acc0_src1_neg : 1; + bool acc1_src0_neg : 1; + bool acc1_src1_neg : 1; + unsigned load_addr : 9; + gpir_codegen_load_off load_offset : 3; + unsigned register0_addr : 4; + bool register0_attribute : 1; + unsigned register1_addr : 4; + bool store0_temporary : 1; + bool store1_temporary : 1; + bool branch : 1; + bool branch_target_lo : 1; + gpir_codegen_store_src store0_src_x : 3; + gpir_codegen_store_src store0_src_y : 3; + gpir_codegen_store_src store1_src_z : 3; + gpir_codegen_store_src store1_src_w : 3; + gpir_codegen_acc_op acc_op : 3; + gpir_codegen_complex_op complex_op : 4; + unsigned store0_addr : 4; + bool store0_varying : 1; + unsigned store1_addr : 4; + bool store1_varying : 1; + gpir_codegen_mul_op mul_op : 3; + gpir_codegen_pass_op pass_op : 3; + gpir_codegen_src complex_src : 5; + gpir_codegen_src pass_src : 5; + unsigned unknown_1 : 4; /* 12: tmp_st, 13: branch */ + unsigned branch_target : 8; +} gpir_codegen_instr; + +void gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr); + +#endif diff --git a/src/gallium/drivers/lima/ir/gp/disasm.c b/src/gallium/drivers/lima/ir/gp/disasm.c new file mode 100644 index 00000000000..41a04942ec9 --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/disasm.c @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2018 Lima Project + * + * Copyright (c) 2013 Codethink (http://www.codethink.co.uk) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "gpir.h" +#include "codegen.h" + +typedef enum { + unit_acc_0, + unit_acc_1, + unit_mul_0, + unit_mul_1, + unit_pass, + unit_complex, + num_units +} gp_unit; + +static const gpir_codegen_store_src gp_unit_to_store_src[num_units] = { + [unit_acc_0] = gpir_codegen_store_src_acc_0, + [unit_acc_1] = gpir_codegen_store_src_acc_1, + [unit_mul_0] = gpir_codegen_store_src_mul_0, + [unit_mul_1] = gpir_codegen_store_src_mul_1, + [unit_pass] = gpir_codegen_store_src_pass, + [unit_complex] = gpir_codegen_store_src_complex, +}; + +static void +print_dest(gpir_codegen_instr *instr, gp_unit unit, unsigned cur_dest_index) +{ + printf("^%u", cur_dest_index + unit); + + gpir_codegen_store_src src = gp_unit_to_store_src[unit]; + + if (instr->store0_src_x == src || + instr->store0_src_y == src) { + if (instr->store0_temporary) { + /* Temporary stores ignore the address, and always use whatever's + * stored in address register 0. + */ + printf("/t[addr0]"); + } else { + if (instr->store0_varying) + printf("/v"); + else + printf("/$"); + printf("%u", instr->store0_addr); + } + + printf("."); + if (instr->store0_src_x == src) + printf("x"); + if (instr->store0_src_y == src) + printf("y"); + } + + if (instr->store1_src_z == src || + instr->store1_src_w == src) { + if (instr->store1_temporary) { + printf("/t[addr0]"); + } else { + if (instr->store1_varying) + printf("/v"); + else + printf("/$"); + printf("%u", instr->store1_addr); + } + + printf("."); + if (instr->store1_src_z == src) + printf("z"); + if (instr->store1_src_w == src) + printf("w"); + } + + if (unit == unit_complex) { + switch (instr->complex_op) { + case gpir_codegen_complex_op_temp_store_addr: + printf("/addr0"); + break; + case gpir_codegen_complex_op_temp_load_addr_0: + printf("/addr1"); + break; + case gpir_codegen_complex_op_temp_load_addr_1: + printf("/addr2"); + break; + case gpir_codegen_complex_op_temp_load_addr_2: + printf("/addr3"); + break; + default: + break; + } + } +} + +static void +print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num, + gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, + unsigned cur_dest_index) +{ + switch (src) { + case gpir_codegen_src_attrib_x: + case gpir_codegen_src_attrib_y: + case gpir_codegen_src_attrib_z: + case gpir_codegen_src_attrib_w: + printf("%c%d.%c", instr->register0_attribute ? 'a' : '$', + instr->register0_addr, "xyzw"[src - gpir_codegen_src_attrib_x]); + break; + + case gpir_codegen_src_register_x: + case gpir_codegen_src_register_y: + case gpir_codegen_src_register_z: + case gpir_codegen_src_register_w: + printf("$%d.%c", instr->register1_addr, + "xyzw"[src - gpir_codegen_src_register_x]); + break; + + case gpir_codegen_src_unknown_0: + case gpir_codegen_src_unknown_1: + case gpir_codegen_src_unknown_2: + case gpir_codegen_src_unknown_3: + printf("unknown%d", src - gpir_codegen_src_unknown_0); + break; + + case gpir_codegen_src_load_x: + case gpir_codegen_src_load_y: + case gpir_codegen_src_load_z: + case gpir_codegen_src_load_w: + printf("t[%d", instr->load_addr); + switch (instr->load_offset) { + case gpir_codegen_load_off_ld_addr_0: + printf("+addr1"); + break; + case gpir_codegen_load_off_ld_addr_1: + printf("+addr2"); + break; + case gpir_codegen_load_off_ld_addr_2: + printf("+addr3"); + break; + case gpir_codegen_load_off_none: + break; + default: + printf("+unk%d", instr->load_offset); + } + printf("].%c", "xyzw"[src - gpir_codegen_src_load_x]); + break; + + case gpir_codegen_src_p1_acc_0: + printf("^%d", cur_dest_index - 1 * num_units + unit_acc_0); + break; + + case gpir_codegen_src_p1_acc_1: + printf("^%d", cur_dest_index - 1 * num_units + unit_acc_1); + break; + + case gpir_codegen_src_p1_mul_0: + printf("^%d", cur_dest_index - 1 * num_units + unit_mul_0); + break; + + case gpir_codegen_src_p1_mul_1: + printf("^%d", cur_dest_index - 1 * num_units + unit_mul_1); + break; + + case gpir_codegen_src_p1_pass: + printf("^%d", cur_dest_index - 1 * num_units + unit_pass); + break; + + case gpir_codegen_src_unused: + printf("unused"); + break; + + case gpir_codegen_src_p1_complex: /* Also ident */ + switch (unit) { + case unit_acc_0: + case unit_acc_1: + if (unit_src_num == 1) { + printf("0"); + return; + } + break; + case unit_mul_0: + case unit_mul_1: + if (unit_src_num == 1) { + printf("1"); + return; + } + break; + default: + break; + } + printf("^%d", cur_dest_index - 1 * num_units + unit_complex); + break; + + case gpir_codegen_src_p2_pass: + printf("^%d", cur_dest_index - 2 * num_units + unit_pass); + break; + + case gpir_codegen_src_p2_acc_0: + printf("^%d", cur_dest_index - 2 * num_units + unit_acc_0); + break; + + case gpir_codegen_src_p2_acc_1: + printf("^%d", cur_dest_index - 2 * num_units + unit_acc_1); + break; + + case gpir_codegen_src_p2_mul_0: + printf("^%d", cur_dest_index - 2 * num_units + unit_mul_0); + break; + + case gpir_codegen_src_p2_mul_1: + printf("^%d", cur_dest_index - 2 * num_units + unit_mul_1); + break; + + case gpir_codegen_src_p1_attrib_x: + case gpir_codegen_src_p1_attrib_y: + case gpir_codegen_src_p1_attrib_z: + case gpir_codegen_src_p1_attrib_w: + printf("%c%d.%c", prev_instr->register0_attribute ? 'a' : '$', + prev_instr->register0_addr, + "xyzw"[src - gpir_codegen_src_attrib_x]); + break; + } +} + +static void +print_mul(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, + unsigned cur_dest_index) +{ + switch (instr->mul_op) { + case gpir_codegen_mul_op_mul: + case gpir_codegen_mul_op_complex2: + if (instr->mul0_src0 != gpir_codegen_src_unused && + instr->mul0_src1 != gpir_codegen_src_unused) { + if (instr->mul0_src1 == gpir_codegen_src_ident && + !instr->mul0_neg) { + printf("mov "); + print_dest(instr, unit_mul_0, cur_dest_index); + printf(" "); + print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr, + cur_dest_index); + } else { + if (instr->mul_op == gpir_codegen_mul_op_complex2) + printf("complex2 "); + else + printf("mul "); + + print_dest(instr, unit_mul_0, cur_dest_index); + printf(" "); + print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr, + cur_dest_index); + printf(" "); + if (instr->mul0_neg) + printf("-"); + print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr, + cur_dest_index); + } + + printf(", "); + } + + if (instr->mul1_src0 != gpir_codegen_src_unused && + instr->mul1_src1 != gpir_codegen_src_unused) { + if (instr->mul1_src1 == gpir_codegen_src_ident && + !instr->mul1_neg) { + printf("mov "); + print_dest(instr, unit_mul_1, cur_dest_index); + printf(" "); + print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr, + cur_dest_index); + } else { + printf("mul "); + print_dest(instr, unit_mul_1, cur_dest_index); + printf(" "); + print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr, + cur_dest_index); + printf(" "); + if (instr->mul1_neg) + printf("-"); + print_src(instr->mul1_src1, unit_mul_0, 1, instr, prev_instr, + cur_dest_index); + } + } + + break; + case gpir_codegen_mul_op_complex1: + printf("complex1 "); + print_dest(instr, unit_mul_0, cur_dest_index); + printf(" "); + print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr, + cur_dest_index); + printf(" "); + print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr, + cur_dest_index); + printf(" "); + print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr, + cur_dest_index); + printf(" "); + print_src(instr->mul1_src1, unit_mul_1, 1, instr, prev_instr, + cur_dest_index); + break; + + case gpir_codegen_mul_op_select: + printf("sel "); + print_dest(instr, unit_mul_0, cur_dest_index); + printf(" "); + print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr, + cur_dest_index); + printf(" "); + print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr, + cur_dest_index); + printf(" "); + print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr, + cur_dest_index); + break; + + default: + printf("unknown%u ", instr->mul_op); + print_dest(instr, unit_mul_0, cur_dest_index); + printf(" "); + print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr, + cur_dest_index); + printf(" "); + print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr, + cur_dest_index); + printf(" "); + print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr, + cur_dest_index); + printf(" "); + print_src(instr->mul1_src1, unit_mul_1, 1, instr, prev_instr, + cur_dest_index); + break; + } + + printf(", "); +} + +typedef struct { + const char *name; + unsigned srcs; +} acc_op_info; + +#define CASE(_name, _srcs) \ + [gpir_codegen_acc_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ + } + +static const acc_op_info acc_op_infos[8] = { + CASE(add, 2), + CASE(floor, 1), + CASE(sign, 1), + CASE(ge, 2), + CASE(lt, 2), + CASE(min, 2), + CASE(max, 2), +}; + +#undef CASE + +static void +print_acc(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, + unsigned cur_dest_index) +{ + const acc_op_info op = acc_op_infos[instr->acc_op]; + + if (instr->acc0_src0 != gpir_codegen_src_unused && + instr->acc0_src1 != gpir_codegen_src_unused) { + acc_op_info acc0_op = op; + if (instr->acc0_src1 == gpir_codegen_src_ident && + instr->acc0_src1_neg) { + /* add x, -0 -> mov x */ + acc0_op.name = "mov"; + acc0_op.srcs = 1; + } + + if (acc0_op.name) + printf("%s ", acc0_op.name); + else + printf("op%u ", instr->acc_op); + + print_dest(instr, unit_acc_0, cur_dest_index); + printf(" "); + if (instr->acc0_src0_neg) + printf("-"); + print_src(instr->acc0_src0, unit_acc_0, 0, instr, prev_instr, + cur_dest_index); + if (acc0_op.srcs > 1) { + printf(" "); + if (instr->acc0_src1_neg) + printf("-"); + print_src(instr->acc0_src1, unit_acc_0, 1, instr, prev_instr, + cur_dest_index); + } + + printf(", "); + } + + if (instr->acc1_src0 != gpir_codegen_src_unused && + instr->acc1_src1 != gpir_codegen_src_unused) { + acc_op_info acc1_op = op; + if (instr->acc1_src1 == gpir_codegen_src_ident && + instr->acc1_src1_neg) { + /* add x, -0 -> mov x */ + acc1_op.name = "mov"; + acc1_op.srcs = 1; + } + + if (acc1_op.name) + printf("%s ", acc1_op.name); + else + printf("op%u ", instr->acc_op); + + print_dest(instr, unit_acc_1, cur_dest_index); + printf(" "); + if (instr->acc1_src0_neg) + printf("-"); + print_src(instr->acc1_src0, unit_acc_1, 0, instr, prev_instr, + cur_dest_index); + if (acc1_op.srcs > 1) { + printf(" "); + if (instr->acc1_src1_neg) + printf("-"); + print_src(instr->acc1_src1, unit_acc_1, 1, instr, prev_instr, + cur_dest_index); + } + + printf(", "); + } +} + +static void +print_pass(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, + unsigned cur_dest_index) +{ + if (instr->pass_src == gpir_codegen_src_unused) + return; + + switch (instr->pass_op) { + case gpir_codegen_pass_op_pass: + printf("mov "); + break; + case gpir_codegen_pass_op_preexp2: + printf("preexp2 "); + break; + case gpir_codegen_pass_op_postlog2: + printf("postlog2 "); + break; + case gpir_codegen_pass_op_clamp: + printf("clamp "); + break; + default: + printf("unk%u ", instr->pass_op); + } + + print_dest(instr, unit_pass, cur_dest_index); + printf(" "); + print_src(instr->pass_src, unit_pass, 0, instr, prev_instr, + cur_dest_index); + + if (instr->pass_op == gpir_codegen_pass_op_clamp) { + printf(" "); + print_src(gpir_codegen_src_load_x, unit_pass, 1, instr, prev_instr, + cur_dest_index); + printf(" "); + print_src(gpir_codegen_src_load_y, unit_pass, 2, instr, prev_instr, + cur_dest_index); + } + + printf(", "); +} + +static void +print_complex(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, + unsigned cur_dest_index) +{ + if (instr->complex_src == gpir_codegen_src_unused) + return; + + switch (instr->complex_op) { + case gpir_codegen_complex_op_nop: + return; + + case gpir_codegen_complex_op_exp2: + printf("exp2 "); + break; + case gpir_codegen_complex_op_log2: + printf("log2 "); + break; + case gpir_codegen_complex_op_rsqrt: + printf("rsqrt "); + break; + case gpir_codegen_complex_op_rcp: + printf("rcp "); + break; + case gpir_codegen_complex_op_pass: + case gpir_codegen_complex_op_temp_store_addr: + case gpir_codegen_complex_op_temp_load_addr_0: + case gpir_codegen_complex_op_temp_load_addr_1: + case gpir_codegen_complex_op_temp_load_addr_2: + printf("mov "); + break; + default: + printf("unk%u ", instr->complex_op); + } + + print_dest(instr, unit_complex, cur_dest_index); + printf(" "); + print_src(instr->complex_src, unit_complex, 0, instr, prev_instr, + cur_dest_index); + printf(", "); +} + +static void +print_instr(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, + unsigned instr_number, unsigned cur_dest_index) +{ + printf("%03d: ", instr_number); + print_mul(instr, prev_instr, cur_dest_index); + print_acc(instr, prev_instr, cur_dest_index); + print_complex(instr, prev_instr, cur_dest_index); + print_pass(instr, prev_instr, cur_dest_index); + + if (instr->branch) { + /* The branch condition is taken from the current pass unit result */ + printf("branch ^%d %03d, ", cur_dest_index + unit_pass, + instr->branch_target + (instr->branch_target_lo ? 0 : 0x100)); + } + + if (instr->unknown_1 != 0) + printf("unknown_1 %u", instr->unknown_1); + + printf("\n"); +} + +void +gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr) +{ + printf("=======disassembly:=======\n"); + + unsigned cur_dest_index = 0; + unsigned cur_instr = 0; + for (gpir_codegen_instr *instr = code; cur_instr < num_instr; + instr++, cur_instr++, cur_dest_index += num_units) { + print_instr(instr, instr - 1, cur_instr, cur_dest_index); + } +} + diff --git a/src/gallium/drivers/lima/ir/gp/gpir.h b/src/gallium/drivers/lima/ir/gp/gpir.h new file mode 100644 index 00000000000..47e4422cd83 --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/gpir.h @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2017 Lima Project + * Copyright (c) 2013 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +#ifndef LIMA_IR_GP_GPIR_H +#define LIMA_IR_GP_GPIR_H + +#include "util/list.h" +#include "util/u_math.h" + +#include "ir/lima_ir.h" + +/* list of operations that a node can do. */ +typedef enum { + gpir_op_mov, + + /* mul ops */ + gpir_op_mul, + gpir_op_select, + gpir_op_complex1, + gpir_op_complex2, + + /* add ops */ + gpir_op_add, + gpir_op_floor, + gpir_op_sign, + gpir_op_ge, + gpir_op_lt, + gpir_op_min, + gpir_op_max, + gpir_op_abs, + gpir_op_not, + + /* mul/add ops */ + gpir_op_neg, + + /* passthrough ops */ + gpir_op_clamp_const, + gpir_op_preexp2, + gpir_op_postlog2, + + /* complex ops */ + gpir_op_exp2_impl, + gpir_op_log2_impl, + gpir_op_rcp_impl, + gpir_op_rsqrt_impl, + + /* load/store ops */ + gpir_op_load_uniform, + gpir_op_load_temp, + gpir_op_load_attribute, + gpir_op_load_reg, + gpir_op_store_temp, + gpir_op_store_reg, + gpir_op_store_varying, + gpir_op_store_temp_load_off0, + gpir_op_store_temp_load_off1, + gpir_op_store_temp_load_off2, + + /* branch */ + gpir_op_branch_cond, + + /* const (emulated) */ + gpir_op_const, + + /* emulated ops */ + gpir_op_exp2, + gpir_op_log2, + gpir_op_rcp, + gpir_op_rsqrt, + gpir_op_ceil, + gpir_op_exp, + gpir_op_log, + gpir_op_sin, + gpir_op_cos, + gpir_op_tan, + gpir_op_branch_uncond, + gpir_op_eq, + gpir_op_ne, + + /* auxiliary ops */ + gpir_op_dummy_f, + gpir_op_dummy_m, + + gpir_op_num, +} gpir_op; + +typedef enum { + gpir_node_type_alu, + gpir_node_type_const, + gpir_node_type_load, + gpir_node_type_store, + gpir_node_type_branch, +} gpir_node_type; + +typedef struct { + char *name; + bool dest_neg; + bool src_neg[4]; + int *slots; + gpir_node_type type; + bool spillless; + bool may_consume_two_slots; +} gpir_op_info; + +extern const gpir_op_info gpir_op_infos[]; + +typedef struct { + enum { + GPIR_DEP_INPUT, /* def is the input of use */ + GPIR_DEP_OFFSET, /* def is the offset of use (i.e. temp store) */ + GPIR_DEP_READ_AFTER_WRITE, + GPIR_DEP_WRITE_AFTER_READ, + GPIR_DEP_VREG_READ_AFTER_WRITE, + GPIR_DEP_VREG_WRITE_AFTER_READ, + } type; + + /* node execute before succ */ + struct gpir_node *pred; + /* node execute after pred */ + struct gpir_node *succ; + + /* for node pred_list */ + struct list_head pred_link; + /* for ndoe succ_list */ + struct list_head succ_link; +} gpir_dep; + +typedef struct gpir_node { + struct list_head list; + gpir_op op; + gpir_node_type type; + int index; + char name[16]; + bool printed; + struct gpir_block *block; + + /* for nodes relationship */ + /* for node who uses this node (successor) */ + struct list_head succ_list; + /* for node this node uses (predecessor) */ + struct list_head pred_list; + + /* for scheduler and regalloc */ + int value_reg; + union { + struct { + int instr; + int pos; + int dist; + int index; + bool ready; + bool inserted; + } sched; + struct { + int parent_index; + float reg_pressure; + int est; + bool scheduled; + } rsched; + struct { + float index; + struct gpir_node *last; + } vreg; + struct { + int index; + } preg; + }; +} gpir_node; + +typedef struct { + gpir_node node; + + gpir_node *children[3]; + bool children_negate[3]; + int num_child; + + bool dest_negate; +} gpir_alu_node; + +typedef struct { + gpir_node node; + union fi value; +} gpir_const_node; + +typedef struct { + int index; + struct list_head list; + + struct list_head defs_list; + struct list_head uses_list; + + int start, end; +} gpir_reg; + +typedef struct { + gpir_node node; + + unsigned index; + unsigned component; + + gpir_reg *reg; + struct list_head reg_link; +} gpir_load_node; + +typedef struct { + gpir_node node; + + unsigned index; + unsigned component; + gpir_node *child; + + gpir_reg *reg; + struct list_head reg_link; +} gpir_store_node; + +enum gpir_instr_slot { + GPIR_INSTR_SLOT_MUL0, + GPIR_INSTR_SLOT_MUL1, + GPIR_INSTR_SLOT_ADD0, + GPIR_INSTR_SLOT_ADD1, + GPIR_INSTR_SLOT_PASS, + GPIR_INSTR_SLOT_COMPLEX, + GPIR_INSTR_SLOT_BRANCH, + GPIR_INSTR_SLOT_REG0_LOAD0, + GPIR_INSTR_SLOT_REG0_LOAD1, + GPIR_INSTR_SLOT_REG0_LOAD2, + GPIR_INSTR_SLOT_REG0_LOAD3, + GPIR_INSTR_SLOT_REG1_LOAD0, + GPIR_INSTR_SLOT_REG1_LOAD1, + GPIR_INSTR_SLOT_REG1_LOAD2, + GPIR_INSTR_SLOT_REG1_LOAD3, + GPIR_INSTR_SLOT_MEM_LOAD0, + GPIR_INSTR_SLOT_MEM_LOAD1, + GPIR_INSTR_SLOT_MEM_LOAD2, + GPIR_INSTR_SLOT_MEM_LOAD3, + GPIR_INSTR_SLOT_STORE0, + GPIR_INSTR_SLOT_STORE1, + GPIR_INSTR_SLOT_STORE2, + GPIR_INSTR_SLOT_STORE3, + GPIR_INSTR_SLOT_NUM, + GPIR_INSTR_SLOT_END, + GPIR_INSTR_SLOT_ALU_BEGIN = GPIR_INSTR_SLOT_MUL0, + GPIR_INSTR_SLOT_ALU_END = GPIR_INSTR_SLOT_COMPLEX, + GPIR_INSTR_SLOT_DIST_TWO_BEGIN = GPIR_INSTR_SLOT_MUL0, + GPIR_INSTR_SLOT_DIST_TWO_END = GPIR_INSTR_SLOT_PASS, +}; + +typedef struct { + int index; + struct list_head list; + + gpir_node *slots[GPIR_INSTR_SLOT_NUM]; + + int alu_num_slot_free; + int alu_num_slot_needed_by_store; + + int reg0_use_count; + bool reg0_is_attr; + int reg0_index; + + int reg1_use_count; + int reg1_index; + + int mem_use_count; + bool mem_is_temp; + int mem_index; + + enum { + GPIR_INSTR_STORE_NONE, + GPIR_INSTR_STORE_VARYING, + GPIR_INSTR_STORE_REG, + GPIR_INSTR_STORE_TEMP, + } store_content[2]; + int store_index[2]; +} gpir_instr; + +typedef struct gpir_block { + struct list_head list; + struct list_head node_list; + struct list_head instr_list; + struct gpir_compiler *comp; + + /* for scheduler */ + union { + struct { + int instr_index; + } sched; + struct { + int node_index; + } rsched; + }; +} gpir_block; + +typedef struct { + gpir_node node; + gpir_block *dest; +} gpir_branch_node; + +struct lima_vs_shader_state; + +typedef struct gpir_compiler { + struct list_head block_list; + int cur_index; + + /* array for searching ssa node */ + gpir_node **var_nodes; + + /* for physical reg */ + struct list_head reg_list; + int cur_reg; + + struct lima_vs_shader_state *prog; + int constant_base; +} gpir_compiler; + +#define GPIR_VALUE_REG_NUM 11 +#define GPIR_PHYSICAL_REG_NUM 64 + +void *gpir_node_create(gpir_block *block, gpir_op op); +gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type); +void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred); +void gpir_node_replace_succ(gpir_node *dst, gpir_node *src); +void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred); +void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_node *new_child); +void gpir_node_insert_child(gpir_node *parent, gpir_node *child, gpir_node *insert_child); +void gpir_node_delete(gpir_node *node); +void gpir_node_print_prog_dep(gpir_compiler *comp); +void gpir_node_print_prog_seq(gpir_compiler *comp); + +#define gpir_node_foreach_succ(node, dep) \ + list_for_each_entry(gpir_dep, dep, &node->succ_list, succ_link) +#define gpir_node_foreach_succ_safe(node, dep) \ + list_for_each_entry_safe(gpir_dep, dep, &node->succ_list, succ_link) +#define gpir_node_foreach_pred(node, dep) \ + list_for_each_entry(gpir_dep, dep, &node->pred_list, pred_link) +#define gpir_node_foreach_pred_safe(node, dep) \ + list_for_each_entry_safe(gpir_dep, dep, &node->pred_list, pred_link) + +static inline bool gpir_node_is_root(gpir_node *node) +{ + return list_empty(&node->succ_list); +} + +static inline bool gpir_node_is_leaf(gpir_node *node) +{ + return list_empty(&node->pred_list); +} + +#define gpir_node_to_alu(node) ((gpir_alu_node *)(node)) +#define gpir_node_to_const(node) ((gpir_const_node *)(node)) +#define gpir_node_to_load(node) ((gpir_load_node *)(node)) +#define gpir_node_to_store(node) ((gpir_store_node *)(node)) + +gpir_instr *gpir_instr_create(gpir_block *block); +bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node); +void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node); +void gpir_instr_print_prog(gpir_compiler *comp); + +bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2); + +bool gpir_pre_rsched_lower_prog(gpir_compiler *comp); +bool gpir_post_rsched_lower_prog(gpir_compiler *comp); +bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp); +bool gpir_value_regalloc_prog(gpir_compiler *comp); +bool gpir_physical_regalloc_prog(gpir_compiler *comp); +bool gpir_schedule_prog(gpir_compiler *comp); +bool gpir_codegen_prog(gpir_compiler *comp); + +gpir_reg *gpir_create_reg(gpir_compiler *comp); + +#endif diff --git a/src/gallium/drivers/lima/ir/gp/instr.c b/src/gallium/drivers/lima/ir/gp/instr.c new file mode 100644 index 00000000000..84736990608 --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/instr.c @@ -0,0 +1,488 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include + +#include "util/ralloc.h" + +#include "gpir.h" + +gpir_instr *gpir_instr_create(gpir_block *block) +{ + gpir_instr *instr = rzalloc(block, gpir_instr); + if (unlikely(!instr)) + return NULL; + + instr->index = block->sched.instr_index++; + instr->alu_num_slot_free = 6; + + list_add(&instr->list, &block->instr_list); + return instr; +} + +static gpir_node *gpir_instr_get_the_other_acc_node(gpir_instr *instr, int slot) +{ + if (slot == GPIR_INSTR_SLOT_ADD0) + return instr->slots[GPIR_INSTR_SLOT_ADD1]; + else if (slot == GPIR_INSTR_SLOT_ADD1) + return instr->slots[GPIR_INSTR_SLOT_ADD0]; + + return NULL; +} + +static bool gpir_instr_check_acc_same_op(gpir_instr *instr, gpir_node *node, int slot) +{ + /* two ACC slots must share the same op code */ + gpir_node *acc_node = gpir_instr_get_the_other_acc_node(instr, slot); + + /* spill move case may get acc_node == node */ + if (acc_node && acc_node != node && + !gpir_codegen_acc_same_op(node->op, acc_node->op)) + return false; + + return true; +} + +static int gpir_instr_get_consume_slot(gpir_instr *instr, gpir_node *node) +{ + if (gpir_op_infos[node->op].may_consume_two_slots) { + gpir_node *acc_node = gpir_instr_get_the_other_acc_node(instr, node->sched.pos); + if (acc_node) + /* at this point node must have the same acc op with acc_node, + * so it just consumes the extra slot acc_node consumed */ + return 0; + else + return 2; + } + else + return 1; +} + +static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node) +{ + if (!gpir_instr_check_acc_same_op(instr, node, node->sched.pos)) + return false; + + int consume_slot = gpir_instr_get_consume_slot(instr, node); + + /* check if this node is child of one store node. + * complex1 won't be any of this instr's store node's child, + * because it has two instr latency before store can use it. + */ + for (int i = GPIR_INSTR_SLOT_STORE0; i < GPIR_INSTR_SLOT_STORE3; i++) { + gpir_store_node *s = gpir_node_to_store(instr->slots[i]); + if (s && s->child == node) { + /* acc node may consume 2 slots, so even it's the child of a + * store node, it may not be inserted successfully, in which + * case we need a move node for it */ + if (instr->alu_num_slot_free - consume_slot < + instr->alu_num_slot_needed_by_store - 1) + return false; + + instr->alu_num_slot_needed_by_store--; + instr->alu_num_slot_free -= consume_slot; + return true; + } + } + + /* not a child of any store node, so must reserve alu slot for store node */ + if (instr->alu_num_slot_free - consume_slot < + instr->alu_num_slot_needed_by_store) + return false; + + instr->alu_num_slot_free -= consume_slot; + return true; +} + +static void gpir_instr_remove_alu(gpir_instr *instr, gpir_node *node) +{ + int consume_slot = gpir_instr_get_consume_slot(instr, node); + + for (int i = GPIR_INSTR_SLOT_STORE0; i < GPIR_INSTR_SLOT_STORE3; i++) { + gpir_store_node *s = gpir_node_to_store(instr->slots[i]); + if (s && s->child == node) { + instr->alu_num_slot_needed_by_store++; + instr->alu_num_slot_free += consume_slot; + return; + } + } + + instr->alu_num_slot_free += consume_slot; +} + +static bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node) +{ + gpir_load_node *load = gpir_node_to_load(node); + int i = node->sched.pos - GPIR_INSTR_SLOT_REG0_LOAD0; + + if (load->component != i) + return false; + + if (instr->reg0_is_attr && node->op != gpir_op_load_attribute) + return false; + + if (instr->reg0_use_count) { + if (instr->reg0_index != load->index) + return false; + } + else { + instr->reg0_is_attr = node->op == gpir_op_load_attribute; + instr->reg0_index = load->index; + } + + instr->reg0_use_count++; + return true; +} + +static void gpir_instr_remove_reg0(gpir_instr *instr, gpir_node *node) +{ + instr->reg0_use_count--; + if (!instr->reg0_use_count) + instr->reg0_is_attr = false; +} + +static bool gpir_instr_insert_reg1_check(gpir_instr *instr, gpir_node *node) +{ + gpir_load_node *load = gpir_node_to_load(node); + int i = node->sched.pos - GPIR_INSTR_SLOT_REG1_LOAD0; + + if (load->component != i) + return false; + + if (instr->reg1_use_count) { + if (instr->reg1_index != load->index) + return false; + } + else + instr->reg1_index = load->index; + + instr->reg1_use_count++; + return true; +} + +static void gpir_instr_remove_reg1(gpir_instr *instr, gpir_node *node) +{ + instr->reg1_use_count--; +} + +static bool gpir_instr_insert_mem_check(gpir_instr *instr, gpir_node *node) +{ + gpir_load_node *load = gpir_node_to_load(node); + int i = node->sched.pos - GPIR_INSTR_SLOT_MEM_LOAD0; + + if (load->component != i) + return false; + + if (instr->mem_is_temp && node->op != gpir_op_load_temp) + return false; + + if (instr->mem_use_count) { + if (instr->mem_index != load->index) + return false; + } + else { + instr->mem_is_temp = node->op == gpir_op_load_temp; + instr->mem_index = load->index; + } + + instr->mem_use_count++; + return true; +} + +static void gpir_instr_remove_mem(gpir_instr *instr, gpir_node *node) +{ + instr->mem_use_count--; + if (!instr->mem_use_count) + instr->mem_is_temp = false; +} + +static bool gpir_instr_insert_store_check(gpir_instr *instr, gpir_node *node) +{ + gpir_store_node *store = gpir_node_to_store(node); + int i = node->sched.pos - GPIR_INSTR_SLOT_STORE0; + + if (store->component != i) + return false; + + i >>= 1; + switch (instr->store_content[i]) { + case GPIR_INSTR_STORE_NONE: + /* store temp has only one address reg for two store unit */ + if (node->op == gpir_op_store_temp && + instr->store_content[!i] == GPIR_INSTR_STORE_TEMP && + instr->store_index[!i] != store->index) + return false; + break; + + case GPIR_INSTR_STORE_VARYING: + if (node->op != gpir_op_store_varying || + instr->store_index[i] != store->index) + return false; + break; + + case GPIR_INSTR_STORE_REG: + if (node->op != gpir_op_store_reg || + instr->store_index[i] != store->index) + return false; + break; + + case GPIR_INSTR_STORE_TEMP: + if (node->op != gpir_op_store_temp || + instr->store_index[i] != store->index) + return false; + break; + } + + /* check if any store node has the same child as this node */ + for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) { + gpir_store_node *s = gpir_node_to_store(instr->slots[j]); + if (s && s->child == store->child) + goto out; + } + + /* check if the child is alrady in this instr's alu slot, + * this may happen when store an scheduled alu node to reg + */ + for (int j = GPIR_INSTR_SLOT_ALU_BEGIN; j <= GPIR_INSTR_SLOT_ALU_END; j++) { + if (store->child == instr->slots[j]) + goto out; + } + + /* no store node has the same child as this node, and child is not + * already in this instr's alu slot, so instr must have some free + * alu slot to insert this node's child + */ + if (instr->alu_num_slot_free <= instr->alu_num_slot_needed_by_store) + return false; + + instr->alu_num_slot_needed_by_store++; + +out: + if (instr->store_content[i] == GPIR_INSTR_STORE_NONE) { + if (node->op == gpir_op_store_varying) + instr->store_content[i] = GPIR_INSTR_STORE_VARYING; + else if (node->op == gpir_op_store_reg) + instr->store_content[i] = GPIR_INSTR_STORE_REG; + else + instr->store_content[i] = GPIR_INSTR_STORE_TEMP; + + instr->store_index[i] = store->index; + } + return true; +} + +static void gpir_instr_remove_store(gpir_instr *instr, gpir_node *node) +{ + gpir_store_node *store = gpir_node_to_store(node); + int component = node->sched.pos - GPIR_INSTR_SLOT_STORE0; + int other_slot = GPIR_INSTR_SLOT_STORE0 + (component ^ 1); + + for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) { + gpir_store_node *s = gpir_node_to_store(instr->slots[j]); + if (s && s->child == store->child) + goto out; + } + + for (int j = GPIR_INSTR_SLOT_ALU_BEGIN; j <= GPIR_INSTR_SLOT_ALU_END; j++) { + if (store->child == instr->slots[j]) + goto out; + } + + instr->alu_num_slot_needed_by_store--; + +out: + if (!instr->slots[other_slot]) + instr->store_content[component >> 1] = GPIR_INSTR_STORE_NONE; +} + +static bool gpir_instr_spill_move(gpir_instr *instr, int slot, int spill_to_start) +{ + gpir_node *node = instr->slots[slot]; + if (!node) + return true; + + if (node->op != gpir_op_mov) + return false; + + for (int i = spill_to_start; i <= GPIR_INSTR_SLOT_DIST_TWO_END; i++) { + if (i != slot && !instr->slots[i] && + gpir_instr_check_acc_same_op(instr, node, i)) { + instr->slots[i] = node; + instr->slots[slot] = NULL; + node->sched.pos = i; + + gpir_debug("instr %d spill move %d from slot %d to %d\n", + instr->index, node->index, slot, i); + return true; + } + } + + return false; +} + +static bool gpir_instr_slot_free(gpir_instr *instr, gpir_node *node) +{ + if (node->op == gpir_op_mov || + node->sched.pos > GPIR_INSTR_SLOT_DIST_TWO_END) { + if (instr->slots[node->sched.pos]) + return false; + } + else { + /* for node needs dist two slot, if the slot has a move, we can + * spill it to other dist two slot without any side effect */ + int spill_to_start = GPIR_INSTR_SLOT_MUL0; + if (node->op == gpir_op_complex1 || node->op == gpir_op_select) + spill_to_start = GPIR_INSTR_SLOT_ADD0; + + if (!gpir_instr_spill_move(instr, node->sched.pos, spill_to_start)) + return false; + + if (node->op == gpir_op_complex1 || node->op == gpir_op_select) { + if (!gpir_instr_spill_move(instr, GPIR_INSTR_SLOT_MUL1, spill_to_start)) + return false; + } + } + + return true; +} + +bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node) +{ + if (!gpir_instr_slot_free(instr, node)) + return false; + + if (node->sched.pos >= GPIR_INSTR_SLOT_ALU_BEGIN && + node->sched.pos <= GPIR_INSTR_SLOT_ALU_END) { + if (!gpir_instr_insert_alu_check(instr, node)) + return false; + } + else if (node->sched.pos >= GPIR_INSTR_SLOT_REG0_LOAD0 && + node->sched.pos <= GPIR_INSTR_SLOT_REG0_LOAD3) { + if (!gpir_instr_insert_reg0_check(instr, node)) + return false; + } + else if (node->sched.pos >= GPIR_INSTR_SLOT_REG1_LOAD0 && + node->sched.pos <= GPIR_INSTR_SLOT_REG1_LOAD3) { + if (!gpir_instr_insert_reg1_check(instr, node)) + return false; + } + else if (node->sched.pos >= GPIR_INSTR_SLOT_MEM_LOAD0 && + node->sched.pos <= GPIR_INSTR_SLOT_MEM_LOAD3) { + if (!gpir_instr_insert_mem_check(instr, node)) + return false; + } + else if (node->sched.pos >= GPIR_INSTR_SLOT_STORE0 && + node->sched.pos <= GPIR_INSTR_SLOT_STORE3) { + if (!gpir_instr_insert_store_check(instr, node)) + return false; + } + + instr->slots[node->sched.pos] = node; + + if (node->op == gpir_op_complex1 || node->op == gpir_op_select) + instr->slots[GPIR_INSTR_SLOT_MUL1] = node; + + return true; +} + +void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node) +{ + if (node->sched.pos >= GPIR_INSTR_SLOT_ALU_BEGIN && + node->sched.pos <= GPIR_INSTR_SLOT_ALU_END) + gpir_instr_remove_alu(instr, node); + else if (node->sched.pos >= GPIR_INSTR_SLOT_REG0_LOAD0 && + node->sched.pos <= GPIR_INSTR_SLOT_REG0_LOAD3) + gpir_instr_remove_reg0(instr, node); + else if (node->sched.pos >= GPIR_INSTR_SLOT_REG1_LOAD0 && + node->sched.pos <= GPIR_INSTR_SLOT_REG1_LOAD3) + gpir_instr_remove_reg1(instr, node); + else if (node->sched.pos >= GPIR_INSTR_SLOT_MEM_LOAD0 && + node->sched.pos <= GPIR_INSTR_SLOT_MEM_LOAD3) + gpir_instr_remove_mem(instr, node); + else if (node->sched.pos >= GPIR_INSTR_SLOT_STORE0 && + node->sched.pos <= GPIR_INSTR_SLOT_STORE3) + gpir_instr_remove_store(instr, node); + + instr->slots[node->sched.pos] = NULL; + + if (node->op == gpir_op_complex1 || node->op == gpir_op_select) + instr->slots[GPIR_INSTR_SLOT_MUL1] = NULL; +} + +void gpir_instr_print_prog(gpir_compiler *comp) +{ + struct { + int len; + char *name; + } fields[] = { + [GPIR_INSTR_SLOT_MUL0] = { 4, "mul0" }, + [GPIR_INSTR_SLOT_MUL1] = { 4, "mul1" }, + [GPIR_INSTR_SLOT_ADD0] = { 4, "add0" }, + [GPIR_INSTR_SLOT_ADD1] = { 4, "add1" }, + [GPIR_INSTR_SLOT_REG0_LOAD3] = { 15, "load0" }, + [GPIR_INSTR_SLOT_REG1_LOAD3] = { 15, "load1" }, + [GPIR_INSTR_SLOT_MEM_LOAD3] = { 15, "load2" }, + [GPIR_INSTR_SLOT_BRANCH] = { 4, "bnch" }, + [GPIR_INSTR_SLOT_STORE3] = { 15, "store" }, + [GPIR_INSTR_SLOT_COMPLEX] = { 4, "cmpl" }, + [GPIR_INSTR_SLOT_PASS] = { 4, "pass" }, + }; + + printf("========prog instr========\n"); + printf(" "); + for (int i = 0; i < GPIR_INSTR_SLOT_NUM; i++) { + if (fields[i].len) + printf("%-*s ", fields[i].len, fields[i].name); + } + printf("\n"); + + int index = 0; + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_instr, instr, &block->instr_list, list) { + printf("%03d: ", index++); + + char buff[16] = "null"; + int start = 0; + for (int j = 0; j < GPIR_INSTR_SLOT_NUM; j++) { + gpir_node *node = instr->slots[j]; + if (fields[j].len) { + if (node) + snprintf(buff + start, sizeof(buff) - start, "%d", node->index); + printf("%-*s ", fields[j].len, buff); + + strcpy(buff, "null"); + start = 0; + } + else { + if (node) + start += snprintf(buff + start, sizeof(buff) - start, "%d", node->index); + start += snprintf(buff + start, sizeof(buff) - start, "|"); + } + } + printf("\n"); + } + printf("-----------------------\n"); + } + printf("==========================\n"); +} diff --git a/src/gallium/drivers/lima/ir/gp/lower.c b/src/gallium/drivers/lima/ir/gp/lower.c new file mode 100644 index 00000000000..b52cb38347b --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/lower.c @@ -0,0 +1,529 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/ralloc.h" + +#include "gpir.h" +#include "lima_context.h" + +static gpir_node * +gpir_lower_create_insert_node(gpir_node *parent, gpir_node *child, + gpir_node *child2, gpir_op op) +{ + gpir_node *node = gpir_node_create(parent->block, op); + if (!node) + return NULL; + + gpir_alu_node *alu = gpir_node_to_alu(node); + alu->children[0] = child; + alu->children[1] = child2; + alu->num_child = 2; + gpir_node_insert_child(parent, child, node); + gpir_node_add_dep(node, child2, GPIR_DEP_INPUT); + list_addtail(&node->list, &parent->list); + return node; +} + +static bool gpir_lower_viewport_transform(gpir_compiler *comp) +{ + gpir_node *rcpw = NULL; + + /* rcpw = 1 / w */ + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_node, node, &block->node_list, list) { + if (node->op == gpir_op_store_varying) { + gpir_store_node *store = gpir_node_to_store(node); + if (store->index == 0 && store->component == 3) { + gpir_node *w = store->child; + + rcpw = gpir_node_create(block, gpir_op_rcp); + if (!rcpw) + return false; + list_addtail(&rcpw->list, &node->list); + + gpir_alu_node *alu = gpir_node_to_alu(rcpw); + alu->children[0] = w; + alu->num_child = 1; + store->child = rcpw; + + gpir_node_insert_child(node, w, rcpw); + goto found; + } + } + } + } + +found: + assert(rcpw); + + /* xyz = xyz * rcpw * scale + transition */ + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_node, node, &block->node_list, list) { + if (node->op == gpir_op_store_varying) { + gpir_store_node *store = gpir_node_to_store(node); + if (store->index == 0 && store->component < 3) { + gpir_node *xyz = store->child; + + gpir_node *mul1 = + gpir_lower_create_insert_node(node, xyz, rcpw, gpir_op_mul); + if (!mul1) + return false; + + gpir_load_node *scale = gpir_node_create(block, gpir_op_load_uniform); + if (!scale) + return false; + scale->index = comp->constant_base; + scale->component = store->component; + list_addtail(&scale->node.list, &node->list); + + gpir_node *mul2 = + gpir_lower_create_insert_node(node, mul1, &scale->node, gpir_op_mul); + if (!mul2) + return false; + + gpir_load_node *translate = gpir_node_create(block, gpir_op_load_uniform); + if (!translate) + return false; + translate->index = comp->constant_base + 1; + translate->component = store->component; + list_addtail(&translate->node.list, &node->list); + + gpir_node *add = + gpir_lower_create_insert_node(node, mul2, &translate->node, gpir_op_add); + if (!add) + return false; + + store->child = add; + } + } + } + } + + comp->constant_base += 2; + return true; +} + +static bool gpir_lower_const(gpir_compiler *comp) +{ + int num_constant = 0; + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { + if (node->op == gpir_op_const) { + if (gpir_node_is_root(node)) + gpir_node_delete(node); + else + num_constant++; + } + } + } + + if (num_constant) { + union fi *constant = ralloc_array(comp->prog, union fi, num_constant); + if (!constant) + return false; + + comp->prog->constant = constant; + comp->prog->constant_size = num_constant * sizeof(union fi); + + int index = 0; + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { + if (node->op == gpir_op_const) { + gpir_const_node *c = gpir_node_to_const(node); + + if (!gpir_node_is_root(node)) { + gpir_load_node *load = gpir_node_create(block, gpir_op_load_uniform); + if (unlikely(!load)) + return false; + + load->index = comp->constant_base + (index >> 2); + load->component = index % 4; + constant[index++] = c->value; + + gpir_node_replace_succ(&load->node, node); + + list_addtail(&load->node.list, &node->list); + + gpir_debug("lower const create uniform %d for const %d\n", + load->node.index, node->index); + } + + gpir_node_delete(node); + } + } + } + } + + return true; +} + +/* duplicate load to all its successors */ +static bool gpir_lower_load(gpir_compiler *comp) +{ + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { + if (node->type == gpir_node_type_load) { + gpir_load_node *load = gpir_node_to_load(node); + + bool first = true; + gpir_node_foreach_succ_safe(node, dep) { + gpir_node *succ = dep->succ; + + if (first) { + first = false; + continue; + } + + gpir_node *new = gpir_node_create(succ->block, node->op); + if (unlikely(!new)) + return false; + list_addtail(&new->list, &succ->list); + + gpir_debug("lower load create %d from %d for succ %d\n", + new->index, node->index, succ->index); + + gpir_load_node *nload = gpir_node_to_load(new); + nload->index = load->index; + nload->component = load->component; + if (load->reg) { + nload->reg = load->reg; + list_addtail(&nload->reg_link, &load->reg->uses_list); + } + + gpir_node_replace_pred(dep, new); + gpir_node_replace_child(succ, node, new); + } + } + } + } + + return true; +} + +static bool gpir_lower_neg(gpir_block *block, gpir_node *node) +{ + gpir_alu_node *neg = gpir_node_to_alu(node); + gpir_node *child = neg->children[0]; + + /* check if child can dest negate */ + if (child->type == gpir_node_type_alu) { + /* negate must be its only successor */ + if (list_is_singular(&child->succ_list) && + gpir_op_infos[child->op].dest_neg) { + gpir_alu_node *alu = gpir_node_to_alu(child); + alu->dest_negate = !alu->dest_negate; + + gpir_node_replace_succ(child, node); + gpir_node_delete(node); + return true; + } + } + + /* check if child can src negate */ + gpir_node_foreach_succ_safe(node, dep) { + gpir_node *succ = dep->succ; + if (succ->type != gpir_node_type_alu) + continue; + + bool success = true; + gpir_alu_node *alu = gpir_node_to_alu(dep->succ); + for (int i = 0; i < alu->num_child; i++) { + if (alu->children[i] == node) { + if (gpir_op_infos[succ->op].src_neg[i]) { + alu->children_negate[i] = !alu->children_negate[i]; + alu->children[i] = child; + } + else + success = false; + } + } + + if (success) + gpir_node_replace_pred(dep, child); + } + + if (gpir_node_is_root(node)) + gpir_node_delete(node); + + return true; +} + +static bool gpir_lower_complex(gpir_block *block, gpir_node *node) +{ + gpir_alu_node *alu = gpir_node_to_alu(node); + gpir_node *child = alu->children[0]; + + gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2); + if (unlikely(!complex2)) + return false; + + complex2->children[0] = child; + complex2->num_child = 1; + gpir_node_add_dep(&complex2->node, child, GPIR_DEP_INPUT); + list_addtail(&complex2->node.list, &node->list); + + int impl_op = 0; + switch (node->op) { + case gpir_op_rcp: + impl_op = gpir_op_rcp_impl; + break; + case gpir_op_rsqrt: + impl_op = gpir_op_rsqrt_impl; + break; + default: + assert(0); + } + + gpir_alu_node *impl = gpir_node_create(block, impl_op); + if (unlikely(!impl)) + return false; + + impl->children[0] = child; + impl->num_child = 1; + gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT); + list_addtail(&impl->node.list, &node->list); + + /* change node to complex1 node */ + node->op = gpir_op_complex1; + alu->children[0] = &impl->node; + alu->children[1] = &complex2->node; + alu->children[2] = child; + alu->num_child = 3; + gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT); + gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT); + + return true; +} + +static bool gpir_lower_node_may_consume_two_slots(gpir_compiler *comp) +{ + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { + if (gpir_op_infos[node->op].may_consume_two_slots) { + /* dummy_f/m are auxiliary nodes for value reg alloc: + * 1. before reg alloc, create fake nodes dummy_f, dummy_m, + * so the tree become: (dummy_m (node dummy_f)) + * dummy_m can be spilled, but other nodes in the tree can't + * be spilled. + * 2. After reg allocation and fake dep add, merge all deps of + * dummy_m and dummy_f to node and remove dummy_m & dummy_f + * + * We may also not use dummy_f/m, but alloc two value reg for + * node. But that means we need to make sure there're 2 free + * slot after the node successors, but we just need one slot + * after to be able to schedule it because we can use one move for + * the two slot node. It's also not easy to handle the spill case + * for the alloc 2 value method. + * + * With the dummy_f/m method, there's no such requirement, the + * node can be scheduled only when there's two slots for it, + * otherwise a move. And the node can be spilled with one reg. + */ + gpir_node *dummy_m = gpir_node_create(block, gpir_op_dummy_m); + if (unlikely(!dummy_m)) + return false; + list_add(&dummy_m->list, &node->list); + + gpir_node *dummy_f = gpir_node_create(block, gpir_op_dummy_f); + if (unlikely(!dummy_f)) + return false; + list_add(&dummy_f->list, &node->list); + + gpir_alu_node *alu = gpir_node_to_alu(dummy_m); + alu->children[0] = node; + alu->children[1] = dummy_f; + alu->num_child = 2; + + gpir_node_replace_succ(dummy_m, node); + gpir_node_add_dep(dummy_m, node, GPIR_DEP_INPUT); + gpir_node_add_dep(dummy_m, dummy_f, GPIR_DEP_INPUT); + + } + } + } + + return true; +} + +/* + * There are no 'equal' or 'not-equal' opcodes. + * eq (a == b) is lowered to and(a >= b, b >= a) + * ne (a != b) is lowered to or(a < b, b < a) + */ +static bool gpir_lower_eq_ne(gpir_block *block, gpir_node *node) +{ + gpir_op cmp_node_op; + gpir_op node_new_op; + switch (node->op) { + case gpir_op_eq: + cmp_node_op = gpir_op_ge; + node_new_op = gpir_op_min; /* and */ + break; + case gpir_op_ne: + cmp_node_op = gpir_op_lt; + node_new_op = gpir_op_max; /* or */ + break; + default: + assert(0); + } + + gpir_alu_node *e = gpir_node_to_alu(node); + + gpir_alu_node *cmp1 = gpir_node_create(block, cmp_node_op); + list_addtail(&cmp1->node.list, &node->list); + gpir_alu_node *cmp2 = gpir_node_create(block, cmp_node_op); + list_addtail(&cmp2->node.list, &node->list); + + cmp1->children[0] = e->children[0]; + cmp1->children[1] = e->children[1]; + cmp1->num_child = 2; + + cmp2->children[0] = e->children[1]; + cmp2->children[1] = e->children[0]; + cmp2->num_child = 2; + + gpir_node_add_dep(&cmp1->node, e->children[0], GPIR_DEP_INPUT); + gpir_node_add_dep(&cmp1->node, e->children[1], GPIR_DEP_INPUT); + + gpir_node_add_dep(&cmp2->node, e->children[0], GPIR_DEP_INPUT); + gpir_node_add_dep(&cmp2->node, e->children[1], GPIR_DEP_INPUT); + + gpir_node_foreach_pred_safe(node, dep) { + gpir_node_remove_dep(node, dep->pred); + } + + gpir_node_add_dep(node, &cmp1->node, GPIR_DEP_INPUT); + gpir_node_add_dep(node, &cmp2->node, GPIR_DEP_INPUT); + + node->op = node_new_op; + e->children[0] = &cmp1->node; + e->children[1] = &cmp2->node; + e->num_child = 2; + + return true; +} + +/* + * There is no 'abs' opcode. + * abs(a) is lowered to max(a, -a) + */ +static bool gpir_lower_abs(gpir_block *block, gpir_node *node) +{ + gpir_alu_node *alu = gpir_node_to_alu(node); + + assert(node->op == gpir_op_abs); + + node->op = gpir_op_max; + + alu->children[1] = alu->children[0]; + alu->children_negate[1] = true; + alu->num_child = 2; + + return true; +} + +/* + * There is no 'not' opcode. + * not(a) is lowered to add(1, -a) + */ +static bool gpir_lower_not(gpir_block *block, gpir_node *node) +{ + gpir_alu_node *alu = gpir_node_to_alu(node); + + assert(alu->node.op == gpir_op_not); + + node->op = gpir_op_add; + + gpir_node *node_const = gpir_node_create(block, gpir_op_const); + gpir_const_node *c = gpir_node_to_const(node_const); + + assert(c->node.op == gpir_op_const); + + list_addtail(&c->node.list, &node->list); + c->value.f = 1.0f; + gpir_node_add_dep(&alu->node, &c->node, GPIR_DEP_INPUT); + + alu->children_negate[1] = !alu->children_negate[0]; + alu->children[1] = alu->children[0]; + alu->children[0] = &c->node; + alu->num_child = 2; + + return true; +} + + +static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = { + [gpir_op_not] = gpir_lower_not, +}; + +static bool (*gpir_post_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = { + [gpir_op_neg] = gpir_lower_neg, + [gpir_op_rcp] = gpir_lower_complex, + [gpir_op_rsqrt] = gpir_lower_complex, + [gpir_op_eq] = gpir_lower_eq_ne, + [gpir_op_ne] = gpir_lower_eq_ne, + [gpir_op_abs] = gpir_lower_abs, +}; + +bool gpir_pre_rsched_lower_prog(gpir_compiler *comp) +{ + if (!gpir_lower_viewport_transform(comp)) + return false; + + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { + if (gpir_pre_rsched_lower_funcs[node->op] && + !gpir_pre_rsched_lower_funcs[node->op](block, node)) + return false; + } + } + + if (!gpir_lower_const(comp)) + return false; + + if (!gpir_lower_load(comp)) + return false; + + gpir_debug("pre rsched lower prog\n"); + gpir_node_print_prog_seq(comp); + return true; +} + +bool gpir_post_rsched_lower_prog(gpir_compiler *comp) +{ + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { + if (gpir_post_rsched_lower_funcs[node->op] && + !gpir_post_rsched_lower_funcs[node->op](block, node)) + return false; + } + } + + if (!gpir_lower_node_may_consume_two_slots(comp)) + return false; + + gpir_debug("post rsched lower prog\n"); + gpir_node_print_prog_seq(comp); + return true; +} diff --git a/src/gallium/drivers/lima/ir/gp/nir.c b/src/gallium/drivers/lima/ir/gp/nir.c new file mode 100644 index 00000000000..69790024ba0 --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/nir.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/ralloc.h" +#include "compiler/nir/nir.h" + +#include "gpir.h" +#include "lima_context.h" + + +static inline void *gpir_node_create_ssa(gpir_block *block, gpir_op op, nir_ssa_def *ssa) +{ + int index = ssa->index; + gpir_node *node = gpir_node_create(block, op); + + block->comp->var_nodes[index] = node; + snprintf(node->name, sizeof(node->name), "ssa%d", index); + list_addtail(&node->list, &block->node_list); + return node; +} + +static inline void *gpir_node_create_reg(gpir_block *block, gpir_op op, nir_reg_dest *reg) +{ + int index = reg->reg->index; + gpir_node *node = gpir_node_create(block, op); + gpir_store_node *store = gpir_node_create(block, gpir_op_store_reg); + + snprintf(node->name, sizeof(node->name), "reg%d", index); + + store->child = node; + gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT); + + list_for_each_entry(gpir_reg, reg, &block->comp->reg_list, list) { + if (reg->index == index) { + store->reg = reg; + list_addtail(&store->reg_link, ®->defs_list); + break; + } + } + + list_addtail(&node->list, &block->node_list); + list_addtail(&store->node.list, &block->node_list); + return node; +} + +static void *gpir_node_create_dest(gpir_block *block, gpir_op op, nir_dest *dest) +{ + if (dest->is_ssa) + return gpir_node_create_ssa(block, op, &dest->ssa); + else + return gpir_node_create_reg(block, op, &dest->reg); +} + +static gpir_node *gpir_node_find(gpir_block *block, gpir_node *succ, nir_src *src) +{ + gpir_node *pred; + + if (src->is_ssa) { + pred = block->comp->var_nodes[src->ssa->index]; + assert(pred); + } + else { + pred = gpir_node_create(block, gpir_op_load_reg); + list_addtail(&pred->list, &succ->list); + + gpir_load_node *load = gpir_node_to_load(pred); + list_for_each_entry(gpir_reg, reg, &block->comp->reg_list, list) { + if (reg->index == src->reg.reg->index) { + load->reg = reg; + list_addtail(&load->reg_link, ®->uses_list); + break; + } + } + } + + return pred; +} + +static int nir_to_gpir_opcodes[nir_num_opcodes] = { + /* not supported */ + [0 ... nir_last_opcode] = -1, + + [nir_op_fmul] = gpir_op_mul, + [nir_op_fadd] = gpir_op_add, + [nir_op_fneg] = gpir_op_neg, + [nir_op_fnot] = gpir_op_not, + [nir_op_fmin] = gpir_op_min, + [nir_op_fmax] = gpir_op_max, + [nir_op_frcp] = gpir_op_rcp, + [nir_op_frsq] = gpir_op_rsqrt, + [nir_op_slt] = gpir_op_lt, + [nir_op_sge] = gpir_op_ge, + [nir_op_bcsel] = gpir_op_select, + [nir_op_ffloor] = gpir_op_floor, + [nir_op_fsign] = gpir_op_sign, + [nir_op_seq] = gpir_op_eq, + [nir_op_sne] = gpir_op_ne, + [nir_op_fand] = gpir_op_min, + [nir_op_for] = gpir_op_max, + [nir_op_fabs] = gpir_op_abs, +}; + +static bool gpir_emit_alu(gpir_block *block, nir_instr *ni) +{ + nir_alu_instr *instr = nir_instr_as_alu(ni); + int op = nir_to_gpir_opcodes[instr->op]; + + if (op < 0) { + gpir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name); + return false; + } + + gpir_alu_node *node = gpir_node_create_dest(block, op, &instr->dest.dest); + if (unlikely(!node)) + return false; + + unsigned num_child = nir_op_infos[instr->op].num_inputs; + assert(num_child <= ARRAY_SIZE(node->children)); + node->num_child = num_child; + + for (int i = 0; i < num_child; i++) { + nir_alu_src *src = instr->src + i; + node->children_negate[i] = src->negate; + + gpir_node *child = gpir_node_find(block, &node->node, &src->src); + node->children[i] = child; + + gpir_node_add_dep(&node->node, child, GPIR_DEP_INPUT); + } + + return true; +} + +static bool gpir_emit_intrinsic(gpir_block *block, nir_instr *ni) +{ + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); + + switch (instr->intrinsic) { + case nir_intrinsic_load_input: + { + gpir_load_node *load = + gpir_node_create_dest(block, gpir_op_load_attribute, &instr->dest); + if (unlikely(!load)) + return false; + + load->index = nir_intrinsic_base(instr); + load->component = nir_intrinsic_component(instr); + + return true; + } + case nir_intrinsic_load_uniform: + { + gpir_load_node *load = + gpir_node_create_dest(block, gpir_op_load_uniform, &instr->dest); + if (unlikely(!load)) + return false; + + int offset = nir_intrinsic_base(instr); + + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + assert(const_offset); + offset += (int)const_offset->f32[0]; + + load->index = offset / 4; + load->component = offset % 4; + + return true; + } + case nir_intrinsic_store_output: + { + gpir_store_node *store = gpir_node_create(block, gpir_op_store_varying); + if (unlikely(!store)) + return false; + list_addtail(&store->node.list, &block->node_list); + + store->index = nir_intrinsic_base(instr); + store->component = nir_intrinsic_component(instr); + + gpir_node *child = gpir_node_find(block, &store->node, instr->src); + store->child = child; + gpir_node_add_dep(&store->node, child, GPIR_DEP_INPUT); + + return true; + } + default: + gpir_error("unsupported nir_intrinsic_instr %d\n", instr->intrinsic); + return false; + } +} + +static bool gpir_emit_load_const(gpir_block *block, nir_instr *ni) +{ + nir_load_const_instr *instr = nir_instr_as_load_const(ni); + gpir_const_node *node = + gpir_node_create_ssa(block, gpir_op_const, &instr->def); + if (unlikely(!node)) + return false; + + assert(instr->def.bit_size == 32); + assert(instr->def.num_components == 1); + + node->value.i = instr->value.i32[0]; + + return true; +} + +static bool gpir_emit_ssa_undef(gpir_block *block, nir_instr *ni) +{ + gpir_error("nir_ssa_undef_instr not support\n"); + return false; +} + +static bool gpir_emit_tex(gpir_block *block, nir_instr *ni) +{ + gpir_error("nir_jump_instr not support\n"); + return false; +} + +static bool gpir_emit_jump(gpir_block *block, nir_instr *ni) +{ + gpir_error("nir_jump_instr not support\n"); + return false; +} + +static bool (*gpir_emit_instr[nir_instr_type_phi])(gpir_block *, nir_instr *) = { + [nir_instr_type_alu] = gpir_emit_alu, + [nir_instr_type_intrinsic] = gpir_emit_intrinsic, + [nir_instr_type_load_const] = gpir_emit_load_const, + [nir_instr_type_ssa_undef] = gpir_emit_ssa_undef, + [nir_instr_type_tex] = gpir_emit_tex, + [nir_instr_type_jump] = gpir_emit_jump, +}; + +static gpir_block *gpir_block_create(gpir_compiler *comp) +{ + gpir_block *block = ralloc(comp, gpir_block); + if (!block) + return NULL; + + list_inithead(&block->node_list); + list_inithead(&block->instr_list); + + return block; +} + +static bool gpir_emit_block(gpir_compiler *comp, nir_block *nblock) +{ + gpir_block *block = gpir_block_create(comp); + if (!block) + return false; + + list_addtail(&block->list, &comp->block_list); + block->comp = comp; + + nir_foreach_instr(instr, nblock) { + assert(instr->type < nir_instr_type_phi); + if (!gpir_emit_instr[instr->type](block, instr)) + return false; + } + + return true; +} + +static bool gpir_emit_if(gpir_compiler *comp, nir_if *nif) +{ + gpir_error("if nir_cf_node not support\n"); + return false; +} + +static bool gpir_emit_loop(gpir_compiler *comp, nir_loop *nloop) +{ + gpir_error("loop nir_cf_node not support\n"); + return false; +} + +static bool gpir_emit_function(gpir_compiler *comp, nir_function_impl *nfunc) +{ + gpir_error("function nir_cf_node not support\n"); + return false; +} + +static bool gpir_emit_cf_list(gpir_compiler *comp, struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) { + bool ret; + + switch (node->type) { + case nir_cf_node_block: + ret = gpir_emit_block(comp, nir_cf_node_as_block(node)); + break; + case nir_cf_node_if: + ret = gpir_emit_if(comp, nir_cf_node_as_if(node)); + break; + case nir_cf_node_loop: + ret = gpir_emit_loop(comp, nir_cf_node_as_loop(node)); + break; + case nir_cf_node_function: + ret = gpir_emit_function(comp, nir_cf_node_as_function(node)); + break; + default: + gpir_error("unknown NIR node type %d\n", node->type); + return false; + } + + if (!ret) + return false; + } + + return true; +} + +gpir_reg *gpir_create_reg(gpir_compiler *comp) +{ + gpir_reg *reg = ralloc(comp, gpir_reg); + reg->index = comp->cur_reg++; + list_addtail(®->list, &comp->reg_list); + list_inithead(®->defs_list); + list_inithead(®->uses_list); + return reg; +} + +static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa) +{ + gpir_compiler *comp = rzalloc(prog, gpir_compiler); + + list_inithead(&comp->block_list); + list_inithead(&comp->reg_list); + + for (int i = 0; i < num_reg; i++) + gpir_create_reg(comp); + + comp->var_nodes = rzalloc_array(comp, gpir_node *, num_ssa); + comp->prog = prog; + return comp; +} + +static int gpir_glsl_type_size(enum glsl_base_type type) +{ + /* only support GLSL_TYPE_FLOAT */ + assert(type == GLSL_TYPE_FLOAT); + return 4; +} + +bool gpir_compile_nir(struct lima_vs_shader_state *prog, struct nir_shader *nir) +{ + nir_function_impl *func = nir_shader_get_entrypoint(nir); + gpir_compiler *comp = gpir_compiler_create(prog, func->reg_alloc, func->ssa_alloc); + if (!comp) + return false; + + comp->constant_base = nir->num_uniforms; + prog->uniform_pending_offset = nir->num_uniforms * 16; + + if (!gpir_emit_cf_list(comp, &func->body)) + goto err_out0; + + gpir_node_print_prog_seq(comp); + gpir_node_print_prog_dep(comp); + + if (!gpir_pre_rsched_lower_prog(comp)) + goto err_out0; + + if (!gpir_reduce_reg_pressure_schedule_prog(comp)) + goto err_out0; + + if (!gpir_post_rsched_lower_prog(comp)) + goto err_out0; + + if (!gpir_value_regalloc_prog(comp)) + goto err_out0; + + if (!gpir_physical_regalloc_prog(comp)) + goto err_out0; + + if (!gpir_schedule_prog(comp)) + goto err_out0; + + if (!gpir_codegen_prog(comp)) + goto err_out0; + + nir_foreach_variable(var, &nir->outputs) { + if (var->data.location == VARYING_SLOT_POS) + assert(var->data.driver_location == 0); + + struct lima_varying_info *v = prog->varying + var->data.driver_location; + if (!v->components) { + v->component_size = gpir_glsl_type_size(glsl_get_base_type(var->type)); + prog->num_varying++; + } + + v->components += glsl_get_components(var->type); + } + + ralloc_free(comp); + return true; + +err_out0: + ralloc_free(comp); + return false; +} + diff --git a/src/gallium/drivers/lima/ir/gp/node.c b/src/gallium/drivers/lima/ir/gp/node.c new file mode 100644 index 00000000000..e6287db713d --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/node.c @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_math.h" +#include "util/ralloc.h" + +#include "gpir.h" + +const gpir_op_info gpir_op_infos[] = { + [gpir_op_mov] = { + .name = "mov", + .slots = (int []) { + GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1, + GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0, + GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_COMPLEX, + GPIR_INSTR_SLOT_END + }, + }, + [gpir_op_mul] = { + .name = "mul", + .dest_neg = true, + .slots = (int []) { GPIR_INSTR_SLOT_MUL1, GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END }, + }, + [gpir_op_select] = { + .name = "select", + .dest_neg = true, + .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END }, + .may_consume_two_slots = true, + }, + [gpir_op_complex1] = { + .name = "complex1", + .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END }, + .spillless = true, + .may_consume_two_slots = true, + }, + [gpir_op_complex2] = { + .name = "complex2", + .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END }, + .spillless = true, + }, + [gpir_op_add] = { + .name = "add", + .src_neg = {true, true, false, false}, + .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, + }, + [gpir_op_floor] = { + .name = "floor", + .src_neg = {true, false, false, false}, + .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, + }, + [gpir_op_sign] = { + .name = "sign", + .src_neg = {true, false, false, false}, + .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, + }, + [gpir_op_ge] = { + .name = "ge", + .src_neg = {true, true, false, false}, + .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, + }, + [gpir_op_lt] = { + .name = "lt", + .src_neg = {true, true, false, false}, + .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, + }, + [gpir_op_min] = { + .name = "min", + .src_neg = {true, true, false, false}, + .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, + .spillless = true, + .may_consume_two_slots = true, + }, + [gpir_op_max] = { + .name = "max", + .src_neg = {true, true, false, false}, + .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, + .spillless = true, + .may_consume_two_slots = true, + }, + [gpir_op_abs] = { + .name = "abs", + .src_neg = {true, true, false, false}, + }, + [gpir_op_neg] = { + .name = "neg", + .slots = (int []) { + GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1, + GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0, + GPIR_INSTR_SLOT_END + }, + }, + [gpir_op_not] = { + .name = "not", + .src_neg = {true, true, false, false}, + .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, + }, + [gpir_op_eq] = { + .name = "eq", + .slots = (int []) { + GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END + }, + }, + [gpir_op_ne] = { + .name = "ne", + .slots = (int []) { + GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END + }, + }, + [gpir_op_clamp_const] = { + .name = "clamp_const", + }, + [gpir_op_preexp2] = { + .name = "preexp2", + }, + [gpir_op_postlog2] = { + .name = "postlog2", + }, + [gpir_op_exp2_impl] = { + .name = "exp2_impl", + }, + [gpir_op_log2_impl] = { + .name = "log2_impl", + }, + [gpir_op_rcp_impl] = { + .name = "rcp_impl", + .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END }, + .spillless = true, + }, + [gpir_op_rsqrt_impl] = { + .name = "rsqrt_impl", + .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END }, + .spillless = true, + }, + [gpir_op_load_uniform] = { + .name = "ld_uni", + .slots = (int []) { + GPIR_INSTR_SLOT_MEM_LOAD0, GPIR_INSTR_SLOT_MEM_LOAD1, + GPIR_INSTR_SLOT_MEM_LOAD2, GPIR_INSTR_SLOT_MEM_LOAD3, + GPIR_INSTR_SLOT_END + }, + .type = gpir_node_type_load, + }, + [gpir_op_load_temp] = { + .name = "ld_tmp", + .type = gpir_node_type_load, + }, + [gpir_op_load_attribute] = { + .name = "ld_att", + .slots = (int []) { + GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1, + GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3, + GPIR_INSTR_SLOT_END + }, + .type = gpir_node_type_load, + }, + [gpir_op_load_reg] = { + .name = "ld_reg", + .slots = (int []) { + GPIR_INSTR_SLOT_REG1_LOAD0, GPIR_INSTR_SLOT_REG1_LOAD1, + GPIR_INSTR_SLOT_REG1_LOAD2, GPIR_INSTR_SLOT_REG1_LOAD3, + GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1, + GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3, + GPIR_INSTR_SLOT_END + }, + .type = gpir_node_type_load, + .spillless = true, + }, + [gpir_op_store_temp] = { + .name = "st_tmp", + .type = gpir_node_type_store, + }, + [gpir_op_store_reg] = { + .name = "st_reg", + .slots = (int []) { + GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1, + GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3, + GPIR_INSTR_SLOT_END + }, + .type = gpir_node_type_store, + .spillless = true, + }, + [gpir_op_store_varying] = { + .name = "st_var", + .slots = (int []) { + GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1, + GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3, + GPIR_INSTR_SLOT_END + }, + .type = gpir_node_type_store, + .spillless = true, + }, + [gpir_op_store_temp_load_off0] = { + .name = "st_of0", + .type = gpir_node_type_store, + }, + [gpir_op_store_temp_load_off1] = { + .name = "st_of1", + .type = gpir_node_type_store, + }, + [gpir_op_store_temp_load_off2] = { + .name = "st_of2", + .type = gpir_node_type_store, + }, + [gpir_op_branch_cond] = { + .name = "branch_cond", + .type = gpir_node_type_branch, + }, + [gpir_op_const] = { + .name = "const", + .type = gpir_node_type_const, + }, + [gpir_op_exp2] = { + .name = "exp2", + }, + [gpir_op_log2] = { + .name = "log2", + }, + [gpir_op_rcp] = { + .name = "rcp", + }, + [gpir_op_rsqrt] = { + .name = "rsqrt", + }, + [gpir_op_ceil] = { + .name = "ceil", + }, + [gpir_op_exp] = { + .name = "exp", + }, + [gpir_op_log] = { + .name = "log", + }, + [gpir_op_sin] = { + .name = "sin", + }, + [gpir_op_cos] = { + .name = "cos", + }, + [gpir_op_tan] = { + .name = "tan", + }, + [gpir_op_dummy_f] = { + .name = "dummy_f", + .type = gpir_node_type_alu, + .spillless = true, + }, + [gpir_op_dummy_m] = { + .name = "dummy_m", + .type = gpir_node_type_alu, + }, + [gpir_op_branch_uncond] = { + .name = "branch_uncond", + .type = gpir_node_type_branch, + }, +}; + +void *gpir_node_create(gpir_block *block, gpir_op op) +{ + static const int node_size[] = { + [gpir_node_type_alu] = sizeof(gpir_alu_node), + [gpir_node_type_const] = sizeof(gpir_const_node), + [gpir_node_type_load] = sizeof(gpir_load_node), + [gpir_node_type_store] = sizeof(gpir_store_node), + [gpir_node_type_branch] = sizeof(gpir_branch_node), + }; + + gpir_node_type type = gpir_op_infos[op].type; + int size = node_size[type]; + gpir_node *node = rzalloc_size(block, size); + if (unlikely(!node)) + return NULL; + + snprintf(node->name, sizeof(node->name), "new"); + + list_inithead(&node->succ_list); + list_inithead(&node->pred_list); + + node->op = op; + node->type = type; + node->index = block->comp->cur_index++; + node->block = block; + + return node; +} + +gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type) +{ + /* don't add dep for two nodes from different block */ + if (succ->block != pred->block) + return NULL; + + /* don't add self loop dep */ + if (succ == pred) + return NULL; + + /* don't add duplicated dep */ + gpir_node_foreach_pred(succ, dep) { + if (dep->pred == pred) { + /* use stronger dependency */ + if (dep->type > type) + dep->type = type; + return dep; + } + } + + gpir_dep *dep = ralloc(succ, gpir_dep); + dep->type = type; + dep->pred = pred; + dep->succ = succ; + list_addtail(&dep->pred_link, &succ->pred_list); + list_addtail(&dep->succ_link, &pred->succ_list); + return dep; +} + +void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred) +{ + gpir_node_foreach_pred(succ, dep) { + if (dep->pred == pred) { + list_del(&dep->succ_link); + list_del(&dep->pred_link); + ralloc_free(dep); + return; + } + } +} + +void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, + gpir_node *new_child) +{ + if (parent->type == gpir_node_type_alu) { + gpir_alu_node *alu = gpir_node_to_alu(parent); + for (int i = 0; i < alu->num_child; i++) { + if (alu->children[i] == old_child) + alu->children[i] = new_child; + } + } + else if (parent->type == gpir_node_type_store) { + gpir_store_node *store = gpir_node_to_store(parent); + if (store->child == old_child) + store->child = new_child; + } +} + +void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred) +{ + list_del(&dep->succ_link); + dep->pred = new_pred; + list_addtail(&dep->succ_link, &new_pred->succ_list); +} + +void gpir_node_replace_succ(gpir_node *dst, gpir_node *src) +{ + gpir_node_foreach_succ_safe(src, dep) { + if (dep->type != GPIR_DEP_INPUT) + continue; + + gpir_node_replace_pred(dep, dst); + gpir_node_replace_child(dep->succ, src, dst); + } +} + +void gpir_node_insert_child(gpir_node *parent, gpir_node *child, + gpir_node *insert_child) +{ + gpir_node_foreach_pred(parent, dep) { + if (dep->pred == child) { + gpir_node_replace_pred(dep, insert_child); + break; + } + } + gpir_node_add_dep(insert_child, child, GPIR_DEP_INPUT); +} + +void gpir_node_delete(gpir_node *node) +{ + gpir_node_foreach_succ_safe(node, dep) { + list_del(&dep->succ_link); + list_del(&dep->pred_link); + ralloc_free(dep); + } + + gpir_node_foreach_pred_safe(node, dep) { + list_del(&dep->succ_link); + list_del(&dep->pred_link); + ralloc_free(dep); + } + + if (node->type == gpir_node_type_store) { + gpir_store_node *store = gpir_node_to_store(node); + if (store->reg) + list_del(&store->reg_link); + } + else if (node->type == gpir_node_type_load) { + gpir_load_node *load = gpir_node_to_load(node); + if (load->reg) + list_del(&load->reg_link); + } + + list_del(&node->list); + ralloc_free(node); +} + +static void gpir_node_print_node(gpir_node *node, int type, int space) +{ + static char *dep_name[] = { + [GPIR_DEP_INPUT] = "input", + [GPIR_DEP_OFFSET] = "offset", + [GPIR_DEP_READ_AFTER_WRITE] = "RaW", + [GPIR_DEP_WRITE_AFTER_READ] = "WaR", + [GPIR_DEP_VREG_READ_AFTER_WRITE] = "vRaW", + [GPIR_DEP_VREG_WRITE_AFTER_READ] = "vWaR", + }; + + for (int i = 0; i < space; i++) + printf(" "); + printf("%s%s %d %s %s\n", node->printed && !gpir_node_is_leaf(node) ? "+" : "", + gpir_op_infos[node->op].name, node->index, node->name, dep_name[type]); + + if (!node->printed) { + gpir_node_foreach_pred(node, dep) { + gpir_node_print_node(dep->pred, dep->type, space + 2); + } + + node->printed = true; + } +} + +void gpir_node_print_prog_dep(gpir_compiler *comp) +{ + if (!(lima_debug & LIMA_DEBUG_GP)) + return; + + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_node, node, &block->node_list, list) { + node->printed = false; + } + } + + printf("======== node prog dep ========\n"); + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_node, node, &block->node_list, list) { + if (gpir_node_is_root(node)) + gpir_node_print_node(node, GPIR_DEP_INPUT, 0); + } + printf("----------------------------\n"); + } +} + +void gpir_node_print_prog_seq(gpir_compiler *comp) +{ + if (!(lima_debug & LIMA_DEBUG_GP)) + return; + + int index = 0; + printf("======== node prog seq ========\n"); + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_node, node, &block->node_list, list) { + printf("%03d: %s %d %s pred", index++, gpir_op_infos[node->op].name, + node->index, node->name); + gpir_node_foreach_pred(node, dep) { + printf(" %d", dep->pred->index); + } + printf(" succ"); + gpir_node_foreach_succ(node, dep) { + printf(" %d", dep->succ->index); + } + printf("\n"); + } + printf("----------------------------\n"); + } +} diff --git a/src/gallium/drivers/lima/ir/gp/physical_regalloc.c b/src/gallium/drivers/lima/ir/gp/physical_regalloc.c new file mode 100644 index 00000000000..87d88a8f9b7 --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/physical_regalloc.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include + +#include "gpir.h" + +/* Linear scan register alloc for physical reg alloc of each + * load/store node + */ + +static void regalloc_print_result(gpir_compiler *comp) +{ + if (!(lima_debug & LIMA_DEBUG_GP)) + return; + + int index = 0; + printf("======== physical regalloc ========\n"); + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_node, node, &block->node_list, list) { + if (node->op == gpir_op_load_reg) { + gpir_load_node *load = gpir_node_to_load(node); + printf("%03d: load %d use reg %d\n", index, node->index, load->reg->index); + } + else if (node->op == gpir_op_store_reg) { + gpir_store_node *store = gpir_node_to_store(node); + printf("%03d: store %d use reg %d\n", index, node->index, store->reg->index); + } + index++; + } + printf("----------------------------\n"); + } +} + +bool gpir_physical_regalloc_prog(gpir_compiler *comp) +{ + int index = 0; + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_node, node, &block->node_list, list) { + node->preg.index = index++; + } + } + + /* calculate each reg liveness interval */ + list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) { + reg->start = INT_MAX; + list_for_each_entry(gpir_store_node, store, ®->defs_list, reg_link) { + if (store->node.preg.index < reg->start) + reg->start = store->node.preg.index; + } + + reg->end = 0; + list_for_each_entry(gpir_load_node, load, ®->uses_list, reg_link) { + if (load->node.preg.index > reg->end) + reg->end = load->node.preg.index; + } + } + + /* sort reg list by start value */ + struct list_head reg_list; + list_replace(&comp->reg_list, ®_list); + list_inithead(&comp->reg_list); + list_for_each_entry_safe(gpir_reg, reg, ®_list, list) { + struct list_head *insert_pos = &comp->reg_list; + list_for_each_entry(gpir_reg, creg, &comp->reg_list, list) { + if (creg->start > reg->start) { + insert_pos = &creg->list; + break; + } + } + list_del(®->list); + list_addtail(®->list, insert_pos); + } + + /* do linear scan reg alloc */ + gpir_reg *active[GPIR_PHYSICAL_REG_NUM] = {0}; + list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) { + int i; + + /* if some reg is expired */ + for (i = 0; i < GPIR_PHYSICAL_REG_NUM; i++) { + if (active[i] && active[i]->end <= reg->start) + active[i] = NULL; + } + + /* find a free reg value for this reg */ + for (i = 0; i < GPIR_PHYSICAL_REG_NUM; i++) { + if (!active[i]) { + active[i] = reg; + reg->index = i; + break; + } + } + + /* TODO: support spill to temp memory */ + assert(i < GPIR_PHYSICAL_REG_NUM); + } + + /* update load/store node info for the real reg */ + list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) { + list_for_each_entry(gpir_store_node, store, ®->defs_list, reg_link) { + store->index = reg->index >> 2; + store->component = reg->index % 4; + } + + list_for_each_entry(gpir_load_node, load, ®->uses_list, reg_link) { + load->index = reg->index >> 2; + load->index = reg->index % 4; + } + } + + regalloc_print_result(comp); + return true; +} diff --git a/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c b/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c new file mode 100644 index 00000000000..f20768e12e4 --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include + +#include "gpir.h" + +/* Register sensitive schedule algorithm from paper: + * "Register-Sensitive Selection, Duplication, and Sequencing of Instructions" + * Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons + */ + +static void schedule_calc_sched_info(gpir_node *node) +{ + int n = 0; + float extra_reg = 1.0f; + + /* update all children's sched info */ + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + + if (pred->rsched.reg_pressure < 0) + schedule_calc_sched_info(pred); + + int est = pred->rsched.est + 1; + if (node->rsched.est < est) + node->rsched.est = est; + + float reg_weight = 1.0f - 1.0f / list_length(&pred->succ_list); + if (extra_reg > reg_weight) + extra_reg = reg_weight; + + n++; + } + + /* leaf instr */ + if (!n) { + node->rsched.reg_pressure = 0; + return; + } + + int i = 0; + float reg[n]; + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + reg[i++] = pred->rsched.reg_pressure; + } + + /* sort */ + for (i = 0; i < n - 1; i++) { + for (int j = 0; j < n - i - 1; j++) { + if (reg[j] > reg[j + 1]) { + float tmp = reg[j + 1]; + reg[j + 1] = reg[j]; + reg[j] = tmp; + } + } + } + + for (i = 0; i < n; i++) { + float pressure = reg[i] + n - (i + 1); + if (pressure > node->rsched.reg_pressure) + node->rsched.reg_pressure = pressure; + } + + /* If all children of this node have multi parents, then this + * node need an extra reg to store its result. For example, + * it's not fair for parent has the same reg pressure as child + * if n==1 and child's successor>1, because we need 2 reg for + * this. + * + * But we can't add a full reg to the reg_pressure, because the + * last parent of a multi-successor child doesn't need an extra + * reg. For example, a single child (with multi successor) node + * should has less reg pressure than a two children (with single + * successor) instr. + * + * extra reg = min(all child)(1.0 - 1.0 / num successor) + */ + node->rsched.reg_pressure += extra_reg; +} + +static void schedule_insert_ready_list(struct list_head *ready_list, + gpir_node *insert_node) +{ + struct list_head *insert_pos = ready_list; + + list_for_each_entry(gpir_node, node, ready_list, list) { + if (insert_node->rsched.parent_index < node->rsched.parent_index || + (insert_node->rsched.parent_index == node->rsched.parent_index && + (insert_node->rsched.reg_pressure < node->rsched.reg_pressure || + (insert_node->rsched.reg_pressure == node->rsched.reg_pressure && + (insert_node->rsched.est >= node->rsched.est))))) { + insert_pos = &node->list; + break; + } + } + + list_del(&insert_node->list); + list_addtail(&insert_node->list, insert_pos); +} + +static void schedule_ready_list(gpir_block *block, struct list_head *ready_list) +{ + if (list_empty(ready_list)) + return; + + gpir_node *node = list_first_entry(ready_list, gpir_node, list); + list_del(&node->list); + + /* schedule the node to the block node list */ + list_add(&node->list, &block->node_list); + node->rsched.scheduled = true; + block->rsched.node_index--; + + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + pred->rsched.parent_index = block->rsched.node_index; + + bool ready = true; + gpir_node_foreach_succ(pred, dep) { + gpir_node *succ = dep->succ; + if (!succ->rsched.scheduled) { + ready = false; + break; + } + } + /* all successor have been scheduled */ + if (ready) + schedule_insert_ready_list(ready_list, pred); + } + + schedule_ready_list(block, ready_list); +} + +static void schedule_block(gpir_block *block) +{ + /* move all nodes to node_list, block->node_list will + * contain schedule result */ + struct list_head node_list; + list_replace(&block->node_list, &node_list); + list_inithead(&block->node_list); + + /* step 2 & 3 */ + list_for_each_entry(gpir_node, node, &node_list, list) { + if (gpir_node_is_root(node)) + schedule_calc_sched_info(node); + block->rsched.node_index++; + } + + /* step 4 */ + struct list_head ready_list; + list_inithead(&ready_list); + + /* step 5 */ + list_for_each_entry_safe(gpir_node, node, &node_list, list) { + if (gpir_node_is_root(node)) { + node->rsched.parent_index = INT_MAX; + schedule_insert_ready_list(&ready_list, node); + } + } + + /* step 6 */ + schedule_ready_list(block, &ready_list); +} + +bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp) +{ + /* No need to build physical reg load/store dependency here, + * because we just exit SSA form, there should be at most + * one load and one store pair for a physical reg within a + * block, and the store must be after load with the output + * of load as input after some calculation. So we don't need to + * insert extra write-after-read or read-after-write dependecy + * for load/store nodes to maintain the right sequence before + * scheduling. + * + * Also no need to handle SSA def/use in difference block, + * because we'll load/store SSA to a physical reg if def/use + * are not in the same block. + */ + + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + block->rsched.node_index = 0; + list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { + node->rsched.reg_pressure = -1; + node->rsched.est = 0; + node->rsched.scheduled = false; + } + } + + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + schedule_block(block); + } + + gpir_debug("after reduce scheduler\n"); + gpir_node_print_prog_seq(comp); + return true; +} diff --git a/src/gallium/drivers/lima/ir/gp/scheduler.c b/src/gallium/drivers/lima/ir/gp/scheduler.c new file mode 100644 index 00000000000..8dbec242a7a --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/scheduler.c @@ -0,0 +1,809 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include + +#include "gpir.h" + +/* + * GP schedule algorithm (by Connor Abbott ) + * + * Pre schedule phase: + * 1. order all nodes in a sequence + * 2. convert the real reg read/write to GP load/store node, now all + * variable is SSA + * 3. do reg alloc for all SSA with 11 reg (value reg) and spill with + * load/store to real reg if needed + * 4. add fake dependency like this: + * after step 3, node sequence is + * 01: r1=r2+r3 + * 02: r4=r1+r2 + * 03: r1=r5+r6 + * we should add a fake dependency of node 3 to node 2 like a + * write-after-read dep. But this is not really write-after-read + * dep because there's no r1 really, because it's a value register. + * We need this fake dep in the schedule phase to make sure in any + * schedule point, there're only <=11 input needed by the past + * scheduled nodes. + * 5. build DAG according to all the real and fake dep + * + * Schedule phase: + * 1. Compute the nodes ready to schedule, if no nodes, exit + * 2. Create a new GP instruction, and call it as current instr + * 3. For any nodes with a use 2 cycles ago with a definition ready to + * schedule, schedule that definition immediately if possible, or else + * schedule a move. + * 4. For any nodes with a use 2 cycles ago but the definition not + * scheduled and not ready to schedule, schedule a move immediately + * to prevent the value from falling off the queue. + * 5. Calculate the number of remaining nodes with a use 1 cycle ago but + * the definition not yet scheduled, and if there are more than 5, + * schedule moves or definitions for the rest now. + * 6. Schedule the rest of the available nodes using your favorite heuristic + * to current instr. + * 7. go to step 1 + * + * Step 5 for the current instruction guarantees that steps 3 and 4 for + * the next instruction will always succeed, so it's only step 5 that can + * possibly fail. Now, note that the nodes whose definitions have not yet + * been scheduled but one or more use has been scheduled, are exactly the + * nodes that are live in the final schedule. Therefore there will never + * be more than 11 of them (guarenteed by the 11 value reg alloc and the + * fake dep added before schedule). The worst case for step 5 is that all of + * these nodes had a use 1 cycle ago, which means that none of them hit + * case 3 or 4 already, so there are 6 slots still available so step 5 + * will always succeed. In general, even if there are exactly 11 values + * live, if n are scheduled in steps 3 and 4, there are 11-n left in step + * 4 so at most 11-n-5 = 6-n are scheduled in step 5 and therefore 6 are + * scheduled total, below the limit. So the algorithm will always succeed. + */ + +static int gpir_min_dist_alu(gpir_dep *dep) +{ + switch (dep->pred->op) { + case gpir_op_load_uniform: + case gpir_op_load_temp: + case gpir_op_load_reg: + case gpir_op_load_attribute: + return 0; + + case gpir_op_complex1: + return 2; + + default: + return 1; + } +} + +static int gpir_get_min_dist(gpir_dep *dep) +{ + switch (dep->type) { + case GPIR_DEP_INPUT: + switch (dep->succ->op) { + case gpir_op_store_temp: + case gpir_op_store_reg: + case gpir_op_store_varying: + /* store must use alu node as input */ + if (dep->pred->type == gpir_node_type_load) + return INT_MAX >> 2; + else + return 0; + + default: + return gpir_min_dist_alu(dep); + } + + case GPIR_DEP_OFFSET: + assert(dep->succ->op == gpir_op_store_temp); + return gpir_min_dist_alu(dep); + + case GPIR_DEP_READ_AFTER_WRITE: + switch (dep->succ->op) { + case gpir_op_load_temp: + assert(dep->pred->op == gpir_op_store_temp); + return 4; + case gpir_op_load_reg: + assert(dep->pred->op == gpir_op_store_reg); + return 3; + case gpir_op_load_uniform: + assert(dep->pred->op == gpir_op_store_temp_load_off0 || + dep->pred->op == gpir_op_store_temp_load_off1 || + dep->pred->op == gpir_op_store_temp_load_off2); + return 4; + default: + assert(0); + } + + case GPIR_DEP_WRITE_AFTER_READ: + switch (dep->pred->op) { + case gpir_op_load_temp: + assert(dep->succ->op == gpir_op_store_temp); + return -3; + case gpir_op_load_reg: + assert(dep->succ->op == gpir_op_store_reg); + return -2; + case gpir_op_load_uniform: + assert(dep->succ->op == gpir_op_store_temp_load_off0 || + dep->succ->op == gpir_op_store_temp_load_off1 || + dep->succ->op == gpir_op_store_temp_load_off2); + return -3; + default: + assert(0); + } + + case GPIR_DEP_VREG_WRITE_AFTER_READ: + return 0; + + case GPIR_DEP_VREG_READ_AFTER_WRITE: + assert(0); /* not possible, this is GPIR_DEP_INPUT */ + } + + return 0; +} + +static int gpir_max_dist_alu(gpir_dep *dep) +{ + switch (dep->pred->op) { + case gpir_op_load_uniform: + case gpir_op_load_temp: + return 0; + case gpir_op_load_attribute: + return 1; + case gpir_op_load_reg: + if (dep->pred->sched.pos < GPIR_INSTR_SLOT_REG0_LOAD0 || + dep->pred->sched.pos > GPIR_INSTR_SLOT_REG0_LOAD3) + return 0; + else + return 1; + case gpir_op_exp2_impl: + case gpir_op_log2_impl: + case gpir_op_rcp_impl: + case gpir_op_rsqrt_impl: + case gpir_op_store_temp_load_off0: + case gpir_op_store_temp_load_off1: + case gpir_op_store_temp_load_off2: + return 1; + case gpir_op_mov: + if (dep->pred->sched.pos == GPIR_INSTR_SLOT_COMPLEX) + return 1; + else + return 2; + default: + return 2; + } +} + +static int gpir_get_max_dist(gpir_dep *dep) +{ + switch (dep->type) { + case GPIR_DEP_INPUT: + switch (dep->succ->op) { + case gpir_op_store_temp: + case gpir_op_store_reg: + case gpir_op_store_varying: + return 0; + + default: + return gpir_max_dist_alu(dep); + } + + case GPIR_DEP_OFFSET: + assert(dep->succ->op == gpir_op_store_temp); + return gpir_max_dist_alu(dep); + + default: + return INT_MAX >> 2; /* Don't want to overflow... */ + } +} + +static void schedule_update_distance(gpir_node *node) +{ + if (gpir_node_is_leaf(node)) { + node->sched.dist = 0; + return; + } + + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + + if (pred->sched.dist < 0) + schedule_update_distance(pred); + + int dist = pred->sched.dist + 1; + if (node->sched.dist < dist) + node->sched.dist = dist; + } +} + +static void schedule_insert_ready_list(struct list_head *ready_list, + gpir_node *insert_node) +{ + /* if this node is fully ready or partially ready + * fully ready: all successors have been scheduled + * partially ready: part of input successors have been scheduled + * + * either fully ready or partially ready node need be inserted to + * the ready list, but we only schedule a move node for partially + * ready node. + */ + bool ready = true, insert = false; + gpir_node_foreach_succ(insert_node, dep) { + gpir_node *succ = dep->succ; + if (succ->sched.instr >= 0) { + if (dep->type == GPIR_DEP_INPUT) + insert = true; + } + else + ready = false; + } + + insert_node->sched.ready = ready; + /* for root node */ + insert |= ready; + + if (!insert || insert_node->sched.inserted) + return; + + struct list_head *insert_pos = ready_list; + list_for_each_entry(gpir_node, node, ready_list, list) { + if (insert_node->sched.dist > node->sched.dist) { + insert_pos = &node->list; + break; + } + } + + list_addtail(&insert_node->list, insert_pos); + insert_node->sched.inserted = true; +} + +static int gpir_get_max_start(gpir_node *node) +{ + int max_start = 0; + + /* find the max start instr constrainted by all successors */ + gpir_node_foreach_succ(node, dep) { + gpir_node *succ = dep->succ; + if (succ->sched.instr < 0) + continue; + + int start = succ->sched.instr + gpir_get_min_dist(dep); + if (start > max_start) + max_start = start; + } + + return max_start; +} + +static int gpir_get_min_end(gpir_node *node) +{ + int min_end = INT_MAX; + + /* find the min end instr constrainted by all successors */ + gpir_node_foreach_succ(node, dep) { + gpir_node *succ = dep->succ; + if (succ->sched.instr < 0) + continue; + + int end = succ->sched.instr + gpir_get_max_dist(dep); + if (end < min_end) + min_end = end; + } + + return min_end; +} + +static gpir_node *gpir_sched_instr_has_load(gpir_instr *instr, gpir_node *node) +{ + gpir_load_node *load = gpir_node_to_load(node); + + for (int i = GPIR_INSTR_SLOT_REG0_LOAD0; i <= GPIR_INSTR_SLOT_MEM_LOAD3; i++) { + if (!instr->slots[i]) + continue; + + gpir_load_node *iload = gpir_node_to_load(instr->slots[i]); + if (load->node.op == iload->node.op && + load->index == iload->index && + load->component == iload->component) + return &iload->node; + } + return NULL; +} + +static bool schedule_try_place_node(gpir_instr *instr, gpir_node *node) +{ + if (node->type == gpir_node_type_load) { + gpir_node *load = gpir_sched_instr_has_load(instr, node); + if (load) { + gpir_debug("same load %d in instr %d for node %d\n", + load->index, instr->index, node->index); + + /* not really merge two node, just fake scheduled same place */ + node->sched.instr = load->sched.instr; + node->sched.pos = load->sched.pos; + return true; + } + } + + node->sched.instr = instr->index; + + int *slots = gpir_op_infos[node->op].slots; + for (int i = 0; slots[i] != GPIR_INSTR_SLOT_END; i++) { + node->sched.pos = slots[i]; + if (node->sched.instr >= gpir_get_max_start(node) && + node->sched.instr <= gpir_get_min_end(node) && + gpir_instr_try_insert_node(instr, node)) + return true; + } + + node->sched.instr = -1; + node->sched.pos = -1; + return false; +} + +static gpir_node *schedule_create_move_node(gpir_node *node) +{ + gpir_alu_node *move = gpir_node_create(node->block, gpir_op_mov); + if (unlikely(!move)) + return NULL; + + move->children[0] = node; + move->num_child = 1; + + move->node.sched.instr = -1; + move->node.sched.pos = -1; + move->node.sched.dist = node->sched.dist; + + gpir_debug("create move %d for %d\n", move->node.index, node->index); + return &move->node; +} + +static gpir_node *gpir_sched_node(gpir_instr *instr, gpir_node *node) +{ + if (node->op == gpir_op_mov) { + gpir_node *child = gpir_node_to_alu(node)->children[0]; + gpir_node_foreach_succ_safe(node, dep) { + gpir_node *succ = dep->succ; + if (succ->sched.instr < 0 || + instr->index < succ->sched.instr + gpir_get_min_dist(dep)) { + gpir_node_replace_pred(dep, child); + if (dep->type == GPIR_DEP_INPUT) + gpir_node_replace_child(succ, node, child); + } + } + MAYBE_UNUSED bool result = schedule_try_place_node(instr, node); + assert(result); + return node; + } + else { + gpir_node *move = schedule_create_move_node(node); + list_del(&node->list); + node->sched.ready = false; + node->sched.inserted = false; + gpir_node_replace_succ(move, node); + gpir_node_add_dep(move, node, GPIR_DEP_INPUT); + return move; + } +} + +static bool gpir_is_input_node(gpir_node *node) +{ + gpir_node_foreach_succ(node, dep) { + if (dep->type == GPIR_DEP_INPUT) + return true; + } + return false; +} + +static int gpir_get_min_scheduled_succ(gpir_node *node) +{ + int min = INT_MAX; + gpir_node_foreach_succ(node, dep) { + gpir_node *succ = dep->succ; + if (succ->sched.instr >= 0 && dep->type == GPIR_DEP_INPUT) { + if (min > succ->sched.instr) + min = succ->sched.instr; + } + } + return min; +} + +static gpir_node *gpir_sched_instr_pass(gpir_instr *instr, + struct list_head *ready_list) +{ + /* fully ready node reach its max dist with any of its successor */ + list_for_each_entry_safe(gpir_node, node, ready_list, list) { + if (node->sched.ready) { + int end = gpir_get_min_end(node); + assert(end >= instr->index); + if (instr->index < end) + continue; + + gpir_debug("fully ready max node %d\n", node->index); + + if (schedule_try_place_node(instr, node)) + return node; + + return gpir_sched_node(instr, node); + } + } + + /* partially ready node reach its max dist with any of its successor */ + list_for_each_entry_safe(gpir_node, node, ready_list, list) { + if (!node->sched.ready) { + int end = gpir_get_min_end(node); + assert(end >= instr->index); + if (instr->index < end) + continue; + + gpir_debug("partially ready max node %d\n", node->index); + + return gpir_sched_node(instr, node); + } + } + + /* schedule node used by previous instr when count > 5 */ + int count = 0; + list_for_each_entry(gpir_node, node, ready_list, list) { + if (gpir_is_input_node(node)) { + int min = gpir_get_min_scheduled_succ(node); + assert(min >= instr->index - 1); + if (min == instr->index - 1) + count += gpir_op_infos[node->op].may_consume_two_slots ? 2 : 1; + } + } + + if (count > 5) { + /* schedule fully ready node first */ + list_for_each_entry(gpir_node, node, ready_list, list) { + if (gpir_is_input_node(node)) { + int min = gpir_get_min_scheduled_succ(node); + if (min == instr->index - 1 && node->sched.ready) { + gpir_debug(">5 ready node %d\n", node->index); + + if (schedule_try_place_node(instr, node)) + return node; + } + } + } + + /* no fully ready node be scheduled, schedule partially ready node */ + list_for_each_entry_safe(gpir_node, node, ready_list, list) { + if (gpir_is_input_node(node)) { + int min = gpir_get_min_scheduled_succ(node); + if (min == instr->index - 1 && !node->sched.ready) { + gpir_debug(">5 partially ready node %d\n", node->index); + + return gpir_sched_node(instr, node); + } + } + } + + /* finally schedule move for fully ready node */ + list_for_each_entry_safe(gpir_node, node, ready_list, list) { + if (gpir_is_input_node(node)) { + int min = gpir_get_min_scheduled_succ(node); + if (min == instr->index - 1 && node->sched.ready) { + gpir_debug(">5 fully ready move node %d\n", node->index); + + return gpir_sched_node(instr, node); + } + } + } + } + + /* schedule remain fully ready nodes */ + list_for_each_entry(gpir_node, node, ready_list, list) { + if (node->sched.ready) { + gpir_debug("remain fully ready node %d\n", node->index); + + if (schedule_try_place_node(instr, node)) + return node; + } + } + + return NULL; +} + +static void schedule_print_pre_one_instr(gpir_instr *instr, + struct list_head *ready_list) +{ + if (!(lima_debug & LIMA_DEBUG_GP)) + return; + + printf("instr %d for ready list:", instr->index); + list_for_each_entry(gpir_node, node, ready_list, list) { + printf(" %d/%c", node->index, node->sched.ready ? 'r' : 'p'); + } + printf("\n"); +} + +static void schedule_print_post_one_instr(gpir_instr *instr) +{ + if (!(lima_debug & LIMA_DEBUG_GP)) + return; + + printf("post schedule instr"); + for (int i = 0; i < GPIR_INSTR_SLOT_NUM; i++) { + if (instr->slots[i]) + printf(" %d/%d", i, instr->slots[i]->index); + } + printf("\n"); +} + + +static bool schedule_one_instr(gpir_block *block, struct list_head *ready_list) +{ + gpir_instr *instr = gpir_instr_create(block); + if (unlikely(!instr)) + return false; + + schedule_print_pre_one_instr(instr, ready_list); + + while (true) { + gpir_node *node = gpir_sched_instr_pass(instr, ready_list); + if (!node) + break; + + if (node->sched.instr < 0) + schedule_insert_ready_list(ready_list, node); + else { + list_del(&node->list); + list_add(&node->list, &block->node_list); + + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + schedule_insert_ready_list(ready_list, pred); + } + } + } + + schedule_print_post_one_instr(instr); + return true; +} + +static bool schedule_block(gpir_block *block) +{ + /* calculate distance */ + list_for_each_entry(gpir_node, node, &block->node_list, list) { + if (gpir_node_is_root(node)) + schedule_update_distance(node); + } + + struct list_head ready_list; + list_inithead(&ready_list); + + /* construct the ready list from root nodes */ + list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { + if (gpir_node_is_root(node)) + schedule_insert_ready_list(&ready_list, node); + } + + list_inithead(&block->node_list); + while (!list_empty(&ready_list)) { + if (!schedule_one_instr(block, &ready_list)) + return false; + } + + return true; +} + +static void schedule_build_vreg_dependency(gpir_block *block) +{ + gpir_node *regs[GPIR_VALUE_REG_NUM] = {0}; + list_for_each_entry(gpir_node, node, &block->node_list, list) { + /* store node has no value reg assigned */ + if (node->value_reg < 0) + continue; + + gpir_node *reg = regs[node->value_reg]; + if (reg) { + gpir_node_foreach_succ(reg, dep) { + /* write after read dep should only apply to real 'read' */ + if (dep->type != GPIR_DEP_INPUT) + continue; + + gpir_node *succ = dep->succ; + gpir_node_add_dep(node, succ, GPIR_DEP_VREG_WRITE_AFTER_READ); + } + } + regs[node->value_reg] = node; + } + + /* merge dummy_f/m to the node created from */ + list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { + if (node->op == gpir_op_dummy_m) { + gpir_alu_node *alu = gpir_node_to_alu(node); + gpir_node *origin = alu->children[0]; + gpir_node *dummy_f = alu->children[1]; + + gpir_node_foreach_succ(node, dep) { + gpir_node *succ = dep->succ; + /* origin and node may have same succ (by VREG/INPUT or + * VREG/VREG dep), so use gpir_node_add_dep() instead of + * gpir_node_replace_pred() */ + gpir_node_add_dep(succ, origin, dep->type); + gpir_node_replace_child(succ, node, origin); + } + gpir_node_delete(dummy_f); + gpir_node_delete(node); + } + } +} + +static void schedule_build_preg_dependency(gpir_compiler *comp) +{ + /* merge reg with the same index */ + gpir_reg *regs[GPIR_VALUE_REG_NUM] = {0}; + list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) { + if (!regs[reg->index]) + regs[reg->index] = reg; + else { + list_splicetail(®->defs_list, ®s[reg->index]->defs_list); + list_splicetail(®->uses_list, ®s[reg->index]->uses_list); + } + } + + /* calculate physical reg read/write dependency for load/store nodes */ + for (int i = 0; i < GPIR_VALUE_REG_NUM; i++) { + gpir_reg *reg = regs[i]; + if (!reg) + continue; + + /* sort reg write */ + struct list_head tmp_list; + list_replace(®->defs_list, &tmp_list); + list_inithead(®->defs_list); + list_for_each_entry_safe(gpir_store_node, store, &tmp_list, reg_link) { + struct list_head *insert_pos = ®->defs_list; + list_for_each_entry(gpir_store_node, st, ®->defs_list, reg_link) { + if (st->node.sched.index > store->node.sched.index) { + insert_pos = &st->reg_link; + break; + } + } + list_del(&store->reg_link); + list_addtail(&store->reg_link, insert_pos); + } + + /* sort reg read */ + list_replace(®->uses_list, &tmp_list); + list_inithead(®->uses_list); + list_for_each_entry_safe(gpir_load_node, load, &tmp_list, reg_link) { + struct list_head *insert_pos = ®->uses_list; + list_for_each_entry(gpir_load_node, ld, ®->uses_list, reg_link) { + if (ld->node.sched.index > load->node.sched.index) { + insert_pos = &ld->reg_link; + break; + } + } + list_del(&load->reg_link); + list_addtail(&load->reg_link, insert_pos); + } + + /* insert dependency */ + gpir_store_node *store = + list_first_entry(®->defs_list, gpir_store_node, reg_link); + gpir_store_node *next = store->reg_link.next != ®->defs_list ? + list_first_entry(&store->reg_link, gpir_store_node, reg_link) : NULL; + + list_for_each_entry(gpir_load_node, load, ®->uses_list, reg_link) { + /* loop until load is between store and next */ + while (next && next->node.sched.index < load->node.sched.index) { + store = next; + next = store->reg_link.next != ®->defs_list ? + list_first_entry(&store->reg_link, gpir_store_node, reg_link) : NULL; + } + + gpir_node_add_dep(&load->node, &store->node, GPIR_DEP_READ_AFTER_WRITE); + if (next) + gpir_node_add_dep(&next->node, &load->node, GPIR_DEP_WRITE_AFTER_READ); + } + } +} + +static void print_statistic(gpir_compiler *comp, int save_index) +{ + int num_nodes[gpir_op_num] = {0}; + int num_created_nodes[gpir_op_num] = {0}; + + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_node, node, &block->node_list, list) { + num_nodes[node->op]++; + if (node->index >= save_index) + num_created_nodes[node->op]++; + } + } + + printf("====== gpir scheduler statistic ======\n"); + printf("---- how many nodes are scheduled ----\n"); + int n = 0, l = 0; + for (int i = 0; i < gpir_op_num; i++) { + if (num_nodes[i]) { + printf("%10s:%-6d", gpir_op_infos[i].name, num_nodes[i]); + n += num_nodes[i]; + if (!(++l % 4)) + printf("\n"); + } + } + if (l % 4) + printf("\n"); + printf("\ntotal: %d\n", n); + + printf("---- how many nodes are created ----\n"); + n = l = 0; + for (int i = 0; i < gpir_op_num; i++) { + if (num_created_nodes[i]) { + printf("%10s:%-6d", gpir_op_infos[i].name, num_created_nodes[i]); + n += num_created_nodes[i]; + if (!(++l % 4)) + printf("\n"); + } + } + if (l % 4) + printf("\n"); + printf("\ntotal: %d\n", n); + printf("------------------------------------\n"); +} + +bool gpir_schedule_prog(gpir_compiler *comp) +{ + int save_index = comp->cur_index; + + /* init schedule info */ + int index = 0; + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + block->sched.instr_index = 0; + list_for_each_entry(gpir_node, node, &block->node_list, list) { + node->sched.instr = -1; + node->sched.pos = -1; + node->sched.index = index++; + node->sched.dist = -1; + node->sched.ready = false; + node->sched.inserted = false; + } + } + + /* build fake/virtual dependency */ + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + schedule_build_vreg_dependency(block); + } + schedule_build_preg_dependency(comp); + + //gpir_debug("after scheduler build reg dependency\n"); + //gpir_node_print_prog_dep(comp); + + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + if (!schedule_block(block)) { + gpir_error("fail schedule block\n"); + return false; + } + } + + if (lima_debug & LIMA_DEBUG_GP) { + print_statistic(comp, save_index); + gpir_instr_print_prog(comp); + } + + return true; +} diff --git a/src/gallium/drivers/lima/ir/gp/value_regalloc.c b/src/gallium/drivers/lima/ir/gp/value_regalloc.c new file mode 100644 index 00000000000..f633b949932 --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/value_regalloc.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "gpir.h" + +/* Linear scan register alloc for value reg alloc of each node */ + +static int regalloc_spill_active_node(gpir_node *active[]) +{ + gpir_node *spill = NULL; + for (int i = 0; i < GPIR_VALUE_REG_NUM; i++) { + if (gpir_op_infos[active[i]->op].spillless) + continue; + + /* spill farest node */ + if (!spill || + spill->vreg.last->vreg.index < active[i]->vreg.last->vreg.index) { + spill = active[i]; + } + } + + assert(spill); + gpir_debug("value regalloc spill node %d for value reg %d\n", + spill->index, spill->value_reg); + + /* create store node for spilled node */ + gpir_store_node *store = gpir_node_create(spill->block, gpir_op_store_reg); + store->child = spill; + /* no need to calculate other vreg values because store & spill won't + * be used in the following schedule again */ + store->node.value_reg = spill->value_reg; + list_addtail(&store->node.list, &spill->list); + + gpir_reg *reg = gpir_create_reg(spill->block->comp); + store->reg = reg; + list_addtail(&store->reg_link, ®->defs_list); + + gpir_node_foreach_succ_safe(spill, dep) { + gpir_node *succ = dep->succ; + gpir_load_node *load = gpir_node_create(succ->block, gpir_op_load_reg); + gpir_node_replace_pred(dep, &load->node); + gpir_node_replace_child(succ, spill, &load->node); + list_addtail(&load->node.list, &succ->list); + + /* only valid for succ already scheduled, succ not scheduled will + * re-write this value */ + load->node.value_reg = spill->value_reg; + load->node.vreg.index = + (list_first_entry(&load->node.list, gpir_node, list)->vreg.index + + list_last_entry(&load->node.list, gpir_node, list)->vreg.index) / 2.0f; + load->node.vreg.last = succ; + + load->reg = reg; + list_addtail(&load->reg_link, ®->uses_list); + } + + gpir_node_add_dep(&store->node, spill, GPIR_DEP_INPUT); + return spill->value_reg; +} + +static void regalloc_block(gpir_block *block) +{ + /* build each node sequence index in the block node list */ + int index = 0; + list_for_each_entry(gpir_node, node, &block->node_list, list) { + node->vreg.index = index++; + } + + /* find the last successor of each node by the sequence index */ + list_for_each_entry(gpir_node, node, &block->node_list, list) { + node->vreg.last = NULL; + gpir_node_foreach_succ(node, dep) { + gpir_node *succ = dep->succ; + if (!node->vreg.last || node->vreg.last->vreg.index < succ->vreg.index) + node->vreg.last = succ; + } + } + + /* do linear scan regalloc */ + int reg_search_start = 0; + gpir_node *active[GPIR_VALUE_REG_NUM] = {0}; + list_for_each_entry(gpir_node, node, &block->node_list, list) { + /* if some reg is expired */ + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + if (pred->vreg.last == node) + active[pred->value_reg] = NULL; + } + + /* no need to alloc value reg for root node */ + if (gpir_node_is_root(node)) { + node->value_reg = -1; + continue; + } + + /* find a free reg for this node */ + int i; + for (i = 0; i < GPIR_VALUE_REG_NUM; i++) { + /* round robin reg select to reduce false dep when schedule */ + int reg = (reg_search_start + i) % GPIR_VALUE_REG_NUM; + if (!active[reg]) { + active[reg] = node; + node->value_reg = reg; + reg_search_start++; + break; + } + } + + /* need spill */ + if (i == GPIR_VALUE_REG_NUM) { + int spilled_reg = regalloc_spill_active_node(active); + active[spilled_reg] = node; + node->value_reg = spilled_reg; + gpir_debug("value regalloc node %d reuse reg %d\n", + node->index, spilled_reg); + } + } +} + +static void regalloc_print_result(gpir_compiler *comp) +{ + if (!(lima_debug & LIMA_DEBUG_GP)) + return; + + int index = 0; + printf("======== value regalloc ========\n"); + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + list_for_each_entry(gpir_node, node, &block->node_list, list) { + printf("%03d: %d/%d %s ", index++, node->index, node->value_reg, + gpir_op_infos[node->op].name); + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + printf(" %d/%d", pred->index, pred->value_reg); + } + printf("\n"); + } + printf("----------------------------\n"); + } +} + +bool gpir_value_regalloc_prog(gpir_compiler *comp) +{ + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + regalloc_block(block); + } + + regalloc_print_result(comp); + return true; +} diff --git a/src/gallium/drivers/lima/ir/lima_ir.h b/src/gallium/drivers/lima/ir/lima_ir.h new file mode 100644 index 00000000000..9ef4b68235e --- /dev/null +++ b/src/gallium/drivers/lima/ir/lima_ir.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +#ifndef LIMA_IR_H +#define LIMA_IR_H + +#include +#include + +#include "nir.h" + +#define gpir_debug(...) \ + do { \ + if (lima_debug & LIMA_DEBUG_GP) \ + printf("gpir: " __VA_ARGS__); \ + } while (0) + +#define gpir_error(...) \ + fprintf(stderr, "gpir: " __VA_ARGS__) + +#define ppir_debug(...) \ + do { \ + if (lima_debug & LIMA_DEBUG_PP) \ + printf("ppir: " __VA_ARGS__); \ + } while (0) + +#define ppir_error(...) \ + fprintf(stderr, "ppir: " __VA_ARGS__) + + +struct ra_regs; +struct lima_vs_shader_state; +struct lima_fs_shader_state; + +/* gpir interface */ +bool gpir_compile_nir(struct lima_vs_shader_state *prog, struct nir_shader *nir); + + +/* ppir interface */ +bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir, + struct ra_regs *ra); +struct ra_regs *ppir_regalloc_init(void *mem_ctx); + +void lima_nir_lower_uniform_to_scalar(nir_shader *shader); + +#endif diff --git a/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c b/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c new file mode 100644 index 00000000000..c1bbbc4b71a --- /dev/null +++ b/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2019 Qiang Yu + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "nir.h" +#include "nir_builder.h" +#include "lima_ir.h" + +static void +lower_load_uniform_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) +{ + b->cursor = nir_before_instr(&intr->instr); + + nir_ssa_def *loads[4]; + for (unsigned i = 0; i < intr->num_components; i++) { + nir_intrinsic_instr *chan_intr = + nir_intrinsic_instr_create(b->shader, intr->intrinsic); + nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest, + 1, intr->dest.ssa.bit_size, NULL); + chan_intr->num_components = 1; + + nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr) * 4 + i); + nir_intrinsic_set_range(chan_intr, nir_intrinsic_range(intr) * 4); + + chan_intr->src[0] = + nir_src_for_ssa(nir_fmul_imm(b, intr->src[0].ssa, 4)); + + nir_builder_instr_insert(b, &chan_intr->instr); + + loads[i] = &chan_intr->dest.ssa; + } + + nir_ssa_def_rewrite_uses(&intr->dest.ssa, + nir_src_for_ssa(nir_vec(b, loads, + intr->num_components))); + nir_instr_remove(&intr->instr); +} + +void +lima_nir_lower_uniform_to_scalar(nir_shader *shader) +{ + nir_foreach_function(function, shader) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (intr->intrinsic != nir_intrinsic_load_uniform || + intr->num_components == 1) + continue; + + lower_load_uniform_to_scalar(&b, intr); + } + } + } + } +} diff --git a/src/gallium/drivers/lima/ir/pp/codegen.c b/src/gallium/drivers/lima/ir/pp/codegen.c new file mode 100644 index 00000000000..1cce28595e6 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/codegen.c @@ -0,0 +1,669 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/ralloc.h" +#include "util/u_half.h" +#include "util/bitscan.h" + +#include "ppir.h" +#include "codegen.h" +#include "lima_context.h" + +static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift) +{ + unsigned ret = 0; + for (int i = 0; i < 4; i++) + ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2); + return ret; +} + +static int get_scl_reg_index(ppir_src *src, int component) +{ + int ret = ppir_target_get_src_reg_index(src); + ret += src->swizzle[component]; + return ret; +} + +static void ppir_codegen_encode_varying(ppir_node *node, void *code) +{ + ppir_codegen_field_varying *f = code; + ppir_load_node *load = ppir_node_to_load(node); + ppir_dest *dest = &load->dest; + int index = ppir_target_get_dest_reg_index(dest); + int num_components = load->num_components; + + if (num_components) { + assert(node->op == ppir_op_load_varying || node->op == ppir_op_load_coords); + + f->imm.dest = index >> 2; + f->imm.mask = dest->write_mask << (index & 0x3); + + int alignment = num_components == 3 ? 3 : num_components - 1; + f->imm.alignment = alignment; + f->imm.offset_vector = 0xf; + + if (alignment == 3) + f->imm.index = load->index >> 2; + else + f->imm.index = load->index >> alignment; + } + else { + assert(node->op == ppir_op_load_coords); + + f->reg.dest = index >> 2; + f->reg.mask = dest->write_mask << (index & 0x3); + + f->reg.source_type = 1; + + ppir_src *src = &load->src; + index = ppir_target_get_src_reg_index(src); + f->reg.source = index >> 2; + f->reg.negate = src->negate; + f->reg.absolute = src->absolute; + f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0); + } +} + +static void ppir_codegen_encode_texld(ppir_node *node, void *code) +{ + ppir_codegen_field_sampler *f = code; + ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node); + + f->index = ldtex->sampler; + f->lod_bias_en = 0; + f->type = ppir_codegen_sampler_type_2d; + f->offset_en = 0; + f->unknown_2 = 0x39001; +} + +static void ppir_codegen_encode_uniform(ppir_node *node, void *code) +{ + ppir_codegen_field_uniform *f = code; + ppir_load_node *load = ppir_node_to_load(node); + + switch (node->op) { + case ppir_op_load_uniform: + f->source = ppir_codegen_uniform_src_uniform; + break; + case ppir_op_load_temp: + f->source = ppir_codegen_uniform_src_temporary; + break; + default: + assert(0); + } + + int num_components = load->num_components; + int alignment = num_components == 4 ? 2 : num_components - 1; + + f->alignment = alignment; + + /* TODO: uniform can be also combined like varying */ + f->index = load->index << (2 - alignment); +} + +static unsigned shift_to_op(int shift) +{ + assert(shift >= -3 && shift <= 3); + return shift < 0 ? shift + 8 : shift; +} + +static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code) +{ + ppir_codegen_field_vec4_mul *f = code; + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_dest *dest = &alu->dest; + int dest_shift = 0; + if (dest->type != ppir_target_pipeline) { + int index = ppir_target_get_dest_reg_index(dest); + dest_shift = index & 0x3; + f->dest = index >> 2; + f->mask = dest->write_mask << dest_shift; + } + f->dest_modifier = dest->modifier; + + switch (node->op) { + case ppir_op_mul: + f->op = shift_to_op(alu->shift); + break; + case ppir_op_mov: + f->op = ppir_codegen_vec4_mul_op_mov; + break; + case ppir_op_max: + f->op = ppir_codegen_vec4_mul_op_max; + break; + case ppir_op_min: + f->op = ppir_codegen_vec4_mul_op_min; + break; + case ppir_op_and: + f->op = ppir_codegen_vec4_mul_op_and; + break; + case ppir_op_or: + f->op = ppir_codegen_vec4_mul_op_or; + break; + case ppir_op_xor: + f->op = ppir_codegen_vec4_mul_op_xor; + break; + case ppir_op_gt: + f->op = ppir_codegen_vec4_mul_op_gt; + break; + case ppir_op_ge: + f->op = ppir_codegen_vec4_mul_op_ge; + break; + case ppir_op_eq: + f->op = ppir_codegen_vec4_mul_op_eq; + break; + case ppir_op_ne: + f->op = ppir_codegen_vec4_mul_op_ne; + break; + case ppir_op_not: + f->op = ppir_codegen_vec4_mul_op_not; + break; + default: + break; + } + + ppir_src *src = alu->src; + int index = ppir_target_get_src_reg_index(src); + f->arg0_source = index >> 2; + f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); + f->arg0_absolute = src->absolute; + f->arg0_negate = src->negate; + + if (alu->num_src == 2) { + src = alu->src + 1; + index = ppir_target_get_src_reg_index(src); + f->arg1_source = index >> 2; + f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); + f->arg1_absolute = src->absolute; + f->arg1_negate = src->negate; + } +} + +static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code) +{ + ppir_codegen_field_float_mul *f = code; + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_dest *dest = &alu->dest; + int dest_component = ffs(dest->write_mask) - 1; + assert(dest_component >= 0); + + if (dest->type != ppir_target_pipeline) { + f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; + f->output_en = true; + } + f->dest_modifier = dest->modifier; + + switch (node->op) { + case ppir_op_mul: + f->op = shift_to_op(alu->shift); + break; + case ppir_op_mov: + f->op = ppir_codegen_float_mul_op_mov; + break; + case ppir_op_max: + f->op = ppir_codegen_float_mul_op_max; + break; + case ppir_op_min: + f->op = ppir_codegen_float_mul_op_min; + break; + case ppir_op_and: + f->op = ppir_codegen_float_mul_op_and; + break; + case ppir_op_or: + f->op = ppir_codegen_float_mul_op_or; + break; + case ppir_op_xor: + f->op = ppir_codegen_float_mul_op_xor; + break; + case ppir_op_gt: + f->op = ppir_codegen_float_mul_op_gt; + break; + case ppir_op_ge: + f->op = ppir_codegen_float_mul_op_ge; + break; + case ppir_op_eq: + f->op = ppir_codegen_float_mul_op_eq; + break; + case ppir_op_ne: + f->op = ppir_codegen_float_mul_op_ne; + break; + case ppir_op_not: + f->op = ppir_codegen_float_mul_op_not; + break; + default: + break; + } + + ppir_src *src = alu->src; + f->arg0_source = get_scl_reg_index(src, dest_component); + f->arg0_absolute = src->absolute; + f->arg0_negate = src->negate; + + if (alu->num_src == 2) { + src = alu->src + 1; + f->arg1_source = get_scl_reg_index(src, dest_component); + f->arg1_absolute = src->absolute; + f->arg1_negate = src->negate; + } +} + +static void ppir_codegen_encode_vec_add(ppir_node *node, void *code) +{ + ppir_codegen_field_vec4_acc *f = code; + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_dest *dest = &alu->dest; + int index = ppir_target_get_dest_reg_index(dest); + int dest_shift = index & 0x3; + f->dest = index >> 2; + f->mask = dest->write_mask << dest_shift; + f->dest_modifier = dest->modifier; + + switch (node->op) { + case ppir_op_add: + f->op = ppir_codegen_vec4_acc_op_add; + break; + case ppir_op_mov: + f->op = ppir_codegen_vec4_acc_op_mov; + break; + case ppir_op_sum3: + f->op = ppir_codegen_vec4_acc_op_sum3; + dest_shift = 0; + break; + case ppir_op_sum4: + f->op = ppir_codegen_vec4_acc_op_sum4; + dest_shift = 0; + break; + case ppir_op_floor: + f->op = ppir_codegen_vec4_acc_op_floor; + break; + case ppir_op_fract: + f->op = ppir_codegen_vec4_acc_op_fract; + break; + case ppir_op_gt: + f->op = ppir_codegen_vec4_acc_op_gt; + break; + case ppir_op_ge: + f->op = ppir_codegen_vec4_acc_op_ge; + break; + case ppir_op_eq: + f->op = ppir_codegen_vec4_acc_op_eq; + break; + case ppir_op_ne: + f->op = ppir_codegen_vec4_acc_op_ne; + break; + case ppir_op_select: + f->op = ppir_codegen_vec4_acc_op_sel; + break; + default: + break; + } + + ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src; + index = ppir_target_get_src_reg_index(src); + + if (src->type == ppir_target_pipeline && + src->pipeline == ppir_pipeline_reg_vmul) + f->mul_in = true; + else + f->arg0_source = index >> 2; + + f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); + f->arg0_absolute = src->absolute; + f->arg0_negate = src->negate; + + if (++src < alu->src + alu->num_src) { + index = ppir_target_get_src_reg_index(src); + f->arg1_source = index >> 2; + f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); + f->arg1_absolute = src->absolute; + f->arg1_negate = src->negate; + } +} + +static void ppir_codegen_encode_scl_add(ppir_node *node, void *code) +{ + ppir_codegen_field_float_acc *f = code; + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_dest *dest = &alu->dest; + int dest_component = ffs(dest->write_mask) - 1; + assert(dest_component >= 0); + + f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; + f->output_en = true; + f->dest_modifier = dest->modifier; + + switch (node->op) { + case ppir_op_add: + f->op = shift_to_op(alu->shift); + break; + case ppir_op_mov: + f->op = ppir_codegen_float_acc_op_mov; + break; + case ppir_op_max: + f->op = ppir_codegen_float_acc_op_max; + break; + case ppir_op_min: + f->op = ppir_codegen_float_acc_op_min; + break; + case ppir_op_floor: + f->op = ppir_codegen_float_acc_op_floor; + break; + case ppir_op_fract: + f->op = ppir_codegen_float_acc_op_fract; + break; + case ppir_op_gt: + f->op = ppir_codegen_float_acc_op_gt; + break; + case ppir_op_ge: + f->op = ppir_codegen_float_acc_op_ge; + break; + case ppir_op_eq: + f->op = ppir_codegen_float_acc_op_eq; + break; + case ppir_op_ne: + f->op = ppir_codegen_float_acc_op_ne; + break; + case ppir_op_select: + f->op = ppir_codegen_float_acc_op_sel; + break; + default: + break; + } + + ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src; + if (src->type == ppir_target_pipeline && + src->pipeline == ppir_pipeline_reg_fmul) + f->mul_in = true; + else + f->arg0_source = get_scl_reg_index(src, dest_component); + f->arg0_absolute = src->absolute; + f->arg0_negate = src->negate; + + if (++src < alu->src + alu->num_src) { + f->arg1_source = get_scl_reg_index(src, dest_component); + f->arg1_absolute = src->absolute; + f->arg1_negate = src->negate; + } +} + +static void ppir_codegen_encode_combine(ppir_node *node, void *code) +{ + ppir_codegen_field_combine *f = code; + ppir_alu_node *alu = ppir_node_to_alu(node); + + switch (node->op) { + case ppir_op_rsqrt: + case ppir_op_log2: + case ppir_op_exp2: + case ppir_op_rcp: + case ppir_op_sqrt: + case ppir_op_sin: + case ppir_op_cos: + { + f->scalar.dest_vec = false; + f->scalar.arg1_en = false; + + ppir_dest *dest = &alu->dest; + int dest_component = ffs(dest->write_mask) - 1; + assert(dest_component >= 0); + f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component; + f->scalar.dest_modifier = dest->modifier; + + ppir_src *src = alu->src; + f->scalar.arg0_src = get_scl_reg_index(src, dest_component); + f->scalar.arg0_absolute = src->absolute; + f->scalar.arg0_negate = src->negate; + + switch (node->op) { + case ppir_op_rsqrt: + f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt; + break; + case ppir_op_log2: + f->scalar.op = ppir_codegen_combine_scalar_op_log2; + break; + case ppir_op_exp2: + f->scalar.op = ppir_codegen_combine_scalar_op_exp2; + break; + case ppir_op_rcp: + f->scalar.op = ppir_codegen_combine_scalar_op_rcp; + break; + case ppir_op_sqrt: + f->scalar.op = ppir_codegen_combine_scalar_op_sqrt; + break; + case ppir_op_sin: + f->scalar.op = ppir_codegen_combine_scalar_op_sin; + break; + case ppir_op_cos: + f->scalar.op = ppir_codegen_combine_scalar_op_cos; + break; + default: + break; + } + } + default: + break; + } +} + +static void ppir_codegen_encode_store_temp(ppir_node *node, void *code) +{ + assert(node->op == ppir_op_store_temp); + + ppir_codegen_field_temp_write *f = code; + ppir_store_node *snode = ppir_node_to_store(node); + int num_components = snode->num_components; + + f->temp_write.dest = 0x03; // 11 - temporary + f->temp_write.source = snode->src.reg->index; + + int alignment = num_components == 4 ? 2 : num_components - 1; + f->temp_write.alignment = alignment; + f->temp_write.index = snode->index << (2 - alignment); + + f->temp_write.offset_reg = snode->index >> 2; +} + +static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code) +{ + for (int i = 0; i < constant->num; i++) + code[i] = util_float_to_half(constant->value[i].f); +} + +typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *); + +static const ppir_codegen_instr_slot_encode_func +ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = { + [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying, + [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld, + [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform, + [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul, + [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul, + [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add, + [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add, + [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine, + [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp, +}; + +static const int ppir_codegen_field_size[] = { + 34, 62, 41, 43, 30, 44, 31, 30, 41, 73 +}; + +static inline int align_to_word(int size) +{ + return ((size + 0x1f) >> 5); +} + +static int get_instr_encode_size(ppir_instr *instr) +{ + int size = 0; + + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { + if (instr->slots[i]) + size += ppir_codegen_field_size[i]; + } + + for (int i = 0; i < 2; i++) { + if (instr->constant[i].num) + size += 64; + } + + return align_to_word(size) + 1; +} + +static void bitcopy(void *dst, int dst_offset, void *src, int src_size) +{ + int off1 = dst_offset & 0x1f; + uint32_t *cpy_dst = dst, *cpy_src = src; + + cpy_dst += (dst_offset >> 5); + + if (off1) { + int off2 = 32 - off1; + int cpy_size = 0; + while (1) { + *cpy_dst |= *cpy_src << off1; + cpy_dst++; + + cpy_size += off2; + if (cpy_size >= src_size) + break; + + *cpy_dst |= *cpy_src >> off2; + cpy_src++; + + cpy_size += off1; + if (cpy_size >= src_size) + break; + } + } + else + memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4); +} + +static int encode_instr(ppir_instr *instr, void *code, void *last_code) +{ + int size = 0; + ppir_codegen_ctrl *ctrl = code; + + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { + if (instr->slots[i]) { + /* max field size (73), align to dword */ + uint8_t output[12] = {0}; + + ppir_codegen_encode_slot[i](instr->slots[i], output); + bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]); + + size += ppir_codegen_field_size[i]; + ctrl->fields |= 1 << i; + } + } + + if (instr->slots[PPIR_INSTR_SLOT_TEXLD]) + ctrl->sync = true; + + for (int i = 0; i < 2; i++) { + if (instr->constant[i].num) { + uint16_t output[4] = {0}; + + ppir_codegen_encode_const(instr->constant + i, output); + bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16); + + size += 64; + ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i); + } + } + + size = align_to_word(size) + 1; + + ctrl->count = size; + if (instr->is_end) + ctrl->stop = true; + + if (last_code) { + ppir_codegen_ctrl *last_ctrl = last_code; + last_ctrl->next_count = size; + last_ctrl->prefetch = true; + } + + return size; +} + +static void ppir_codegen_print_prog(ppir_compiler *comp) +{ + uint32_t *prog = comp->prog->shader; + unsigned offset = 0; + + printf("========ppir codegen========\n"); + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + printf("%03d: ", instr->index); + int n = prog[0] & 0x1f; + for (int i = 0; i < n; i++) { + if (i && i % 6 == 0) + printf("\n "); + printf("%08x ", prog[i]); + } + printf("\n"); + ppir_disassemble_instr(prog, offset); + prog += n; + offset += n; + } + } + printf("-----------------------\n"); +} + +bool ppir_codegen_prog(ppir_compiler *comp) +{ + int size = 0; + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + size += get_instr_encode_size(instr); + } + } + + uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t)); + if (!prog) + return false; + + uint32_t *code = prog, *last_code = NULL; + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + int offset = encode_instr(instr, code, last_code); + last_code = code; + code += offset; + } + } + + comp->prog->shader = prog; + comp->prog->shader_size = size * sizeof(uint32_t); + + if (lima_debug & LIMA_DEBUG_PP) + ppir_codegen_print_prog(comp); + + return true; +} diff --git a/src/gallium/drivers/lima/ir/pp/codegen.h b/src/gallium/drivers/lima/ir/pp/codegen.h new file mode 100644 index 00000000000..ab80d392dc2 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/codegen.h @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2017 Lima Project + * Copyright (c) 2013 Ben Brewer (ben.brewer@codethink.co.uk) + * Copyright (c) 2013 Connor Abbott (connor@abbott.cx) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef LIMA_IR_PP_CODEGEN_H +#define LIMA_IR_PP_CODEGEN_H + +#include +#include + +/* Control */ + +typedef union __attribute__((__packed__)) { + struct __attribute__((__packed__)) { + unsigned count : 5; + bool stop : 1; + bool sync : 1; + unsigned fields : 12; + unsigned next_count : 6; + bool prefetch : 1; + unsigned unknown : 6; + }; + uint32_t mask; +} ppir_codegen_ctrl; + +typedef enum { + ppir_codegen_field_shift_varying = 0, + ppir_codegen_field_shift_sampler = 1, + ppir_codegen_field_shift_uniform = 2, + ppir_codegen_field_shift_vec4_mul = 3, + ppir_codegen_field_shift_float_mul = 4, + ppir_codegen_field_shift_vec4_acc = 5, + ppir_codegen_field_shift_float_acc = 6, + ppir_codegen_field_shift_combine = 7, + ppir_codegen_field_shift_temp_write = 8, + ppir_codegen_field_shift_branch = 9, + ppir_codegen_field_shift_vec4_const_0 = 10, + ppir_codegen_field_shift_vec4_const_1 = 11, + ppir_codegen_field_shift_count = 12, +} ppir_codegen_field_shift; + +/* Data Inputs */ + +typedef enum { + ppir_codegen_vec4_reg_frag_color = 0, + ppir_codegen_vec4_reg_constant0 = 12, + ppir_codegen_vec4_reg_constant1 = 13, + ppir_codegen_vec4_reg_texture = 14, + ppir_codegen_vec4_reg_uniform = 15, + ppir_codegen_vec4_reg_discard = 15, +} ppir_codegen_vec4_reg; + +typedef union __attribute__((__packed__)) { + struct __attribute__((__packed__)) { + unsigned perspective : 2; + unsigned source_type : 2; + unsigned unknown_0 : 1; /* = 0 */ + unsigned alignment : 2; + unsigned unknown_1 : 3; /* = 00 0 */ + unsigned offset_vector : 4; + unsigned unknown_2 : 2; /* = 00 */ + unsigned offset_scalar : 2; + unsigned index : 6; + ppir_codegen_vec4_reg dest : 4; + unsigned mask : 4; + unsigned unknown_3 : 2; /* = 00 */ + } imm; + struct __attribute__((__packed__)) { + unsigned perspective : 2; + unsigned source_type : 2; /* = 01 */ + unsigned unknown_0 : 2; /* = 00 */ + bool normalize : 1; + unsigned unknown_1 : 3; + ppir_codegen_vec4_reg source : 4; + bool negate : 1; + bool absolute : 1; + unsigned swizzle : 8; + ppir_codegen_vec4_reg dest : 4; + unsigned mask : 4; + unsigned unknown_2 : 2; /* = 00 */ + } reg; +} ppir_codegen_field_varying; + +typedef enum { + ppir_codegen_sampler_type_2d = 0x00, + ppir_codegen_sampler_type_cube = 0x1F, +} ppir_codegen_sampler_type; + +typedef struct __attribute__((__packed__)) { + unsigned lod_bias : 6; + unsigned index_offset : 6; + unsigned unknown_0 : 6; /* = 000000 */ + bool lod_bias_en : 1; + unsigned unknown_1 : 5; /* = 00000 */ + ppir_codegen_sampler_type type : 5; + bool offset_en : 1; + unsigned index : 12; + unsigned unknown_2 : 20; /* = 0011 1001 0000 0000 0001 */ +} ppir_codegen_field_sampler; + +typedef enum { + ppir_codegen_uniform_src_uniform = 0, + ppir_codegen_uniform_src_temporary = 3, +} ppir_codegen_uniform_src; + +typedef struct __attribute__((__packed__)) { + ppir_codegen_uniform_src source : 2; + unsigned unknown_0 : 8; /* = 00 0000 00 */ + unsigned alignment : 2; /* 00: float, 01: vec2, 10: vec4 */ + unsigned unknown_1 : 6; /* = 00 0000 */ + unsigned offset_reg : 6; + bool offset_en : 1; + unsigned index : 16; +} ppir_codegen_field_uniform; + +/* Vector Pipe */ + +typedef enum { + ppir_codegen_vec4_mul_op_not = 0x08, /* Logical Not */ + ppir_codegen_vec4_mul_op_and = 0x09, /* Logical AND */ + ppir_codegen_vec4_mul_op_or = 0x0A, /* Logical OR */ + ppir_codegen_vec4_mul_op_xor = 0x0B, /* Logical XOR */ + ppir_codegen_vec4_mul_op_ne = 0x0C, /* Not Equal */ + ppir_codegen_vec4_mul_op_gt = 0x0D, /* Great Than */ + ppir_codegen_vec4_mul_op_ge = 0x0E, /* Great than or Equal */ + ppir_codegen_vec4_mul_op_eq = 0x0F, /* Equal */ + ppir_codegen_vec4_mul_op_min = 0x10, /* Minimum */ + ppir_codegen_vec4_mul_op_max = 0x11, /* Maximum */ + ppir_codegen_vec4_mul_op_mov = 0x1F, /* Passthrough, result = arg1 */ +} ppir_codegen_vec4_mul_op; + +typedef enum { + ppir_codegen_outmod_none = 0, + ppir_codegen_outmod_clamp_fraction = 1, + ppir_codegen_outmod_clamp_positive = 2, + ppir_codegen_outmod_round = 3, +} ppir_codegen_outmod; + +typedef struct __attribute__((__packed__)) { + ppir_codegen_vec4_reg arg0_source : 4; + unsigned arg0_swizzle : 8; + bool arg0_absolute : 1; + bool arg0_negate : 1; + ppir_codegen_vec4_reg arg1_source : 4; + unsigned arg1_swizzle : 8; + bool arg1_absolute : 1; + bool arg1_negate : 1; + unsigned dest : 4; + unsigned mask : 4; + ppir_codegen_outmod dest_modifier : 2; + ppir_codegen_vec4_mul_op op : 5; +} ppir_codegen_field_vec4_mul; + +typedef enum { + ppir_codegen_vec4_acc_op_add = 0x00, + ppir_codegen_vec4_acc_op_fract = 0x04, /* Fract? */ + ppir_codegen_vec4_acc_op_ne = 0x08, /* Not Equal */ + ppir_codegen_vec4_acc_op_gt = 0x09, /* Great-Than */ + ppir_codegen_vec4_acc_op_ge = 0x0A, /* Great-than or Equal */ + ppir_codegen_vec4_acc_op_eq = 0x0B, /* Equal */ + ppir_codegen_vec4_acc_op_floor = 0x0C, + ppir_codegen_vec4_acc_op_ceil = 0x0D, + ppir_codegen_vec4_acc_op_min = 0x0E, + ppir_codegen_vec4_acc_op_max = 0x0F, + ppir_codegen_vec4_acc_op_sum3 = 0x10, /* dest.xyzw = (arg0.x + arg0.y + arg0.z) */ + ppir_codegen_vec4_acc_op_sum4 = 0x11, /* dest.xyzw = (arg0.x + arg0.y + arg0.z + arg0.w) */ + ppir_codegen_vec4_acc_op_dFdx = 0x14, + ppir_codegen_vec4_acc_op_dFdy = 0x15, + ppir_codegen_vec4_acc_op_sel = 0x17, /* result = (^fmul ? arg0 : arg1) */ + ppir_codegen_vec4_acc_op_mov = 0x1F, /* Passthrough, result = arg0 */ +} ppir_codegen_vec4_acc_op; + +typedef struct __attribute__((__packed__)) { + ppir_codegen_vec4_reg arg0_source : 4; + unsigned arg0_swizzle : 8; + bool arg0_absolute : 1; + bool arg0_negate : 1; + ppir_codegen_vec4_reg arg1_source : 4; + unsigned arg1_swizzle : 8; + bool arg1_absolute : 1; + bool arg1_negate : 1; + unsigned dest : 4; + unsigned mask : 4; + ppir_codegen_outmod dest_modifier : 2; + ppir_codegen_vec4_acc_op op : 5; + bool mul_in : 1; /* whether to get arg0 from multiply unit below */ +} ppir_codegen_field_vec4_acc; + +/* Float (Scalar) Pipe */ + +typedef enum { + ppir_codegen_float_mul_op_not = 0x08, /* Logical Not */ + ppir_codegen_float_mul_op_and = 0x09, /* Logical AND */ + ppir_codegen_float_mul_op_or = 0x0A, /* Logical OR */ + ppir_codegen_float_mul_op_xor = 0x0B, /* Logical XOR */ + ppir_codegen_float_mul_op_ne = 0x0C, /* Not Equal */ + ppir_codegen_float_mul_op_gt = 0x0D, /* Great Than */ + ppir_codegen_float_mul_op_ge = 0x0E, /* great than or Equal */ + ppir_codegen_float_mul_op_eq = 0x0F, /* Equal */ + ppir_codegen_float_mul_op_min = 0x10, /* Minimum */ + ppir_codegen_float_mul_op_max = 0x11, /* Maximum */ + ppir_codegen_float_mul_op_mov = 0x1F, /* Passthrough, result = arg1 */ +} ppir_codegen_float_mul_op; + +typedef struct __attribute__((__packed__)) { + unsigned arg0_source : 6; + bool arg0_absolute : 1; + bool arg0_negate : 1; + unsigned arg1_source : 6; + bool arg1_absolute : 1; + bool arg1_negate : 1; + unsigned dest : 6; + bool output_en : 1; /* Set to 0 when outputting directly to float_acc below. */ + ppir_codegen_outmod dest_modifier : 2; + ppir_codegen_float_mul_op op : 5; +} ppir_codegen_field_float_mul; + +typedef enum { + ppir_codegen_float_acc_op_add = 0x00, + ppir_codegen_float_acc_op_fract = 0x04, + ppir_codegen_float_acc_op_ne = 0x08, /* Not Equal */ + ppir_codegen_float_acc_op_gt = 0x09, /* Great-Than */ + ppir_codegen_float_acc_op_ge = 0x0A, /* Great-than or Equal */ + ppir_codegen_float_acc_op_eq = 0x0B, /* Equal */ + ppir_codegen_float_acc_op_floor = 0x0C, + ppir_codegen_float_acc_op_ceil = 0x0D, + ppir_codegen_float_acc_op_min = 0x0E, + ppir_codegen_float_acc_op_max = 0x0F, + ppir_codegen_float_acc_op_dFdx = 0x14, + ppir_codegen_float_acc_op_dFdy = 0x15, + ppir_codegen_float_acc_op_sel = 0x17, /* result = (^fmul ? arg0 : arg1) */ + ppir_codegen_float_acc_op_mov = 0x1F, /* Passthrough, result = arg1 */ +} ppir_codegen_float_acc_op; + +typedef struct __attribute__((__packed__)) { + unsigned arg0_source : 6; + bool arg0_absolute : 1; + bool arg0_negate : 1; + unsigned arg1_source : 6; + bool arg1_absolute : 1; + bool arg1_negate : 1; + unsigned dest : 6; + bool output_en : 1; /* Always true */ + ppir_codegen_outmod dest_modifier : 2; + ppir_codegen_float_acc_op op : 5; + bool mul_in : 1; /* Get arg1 from float_mul above. */ +} ppir_codegen_field_float_acc; + +/* Temporary Write / Framebuffer Read */ + +typedef union __attribute__((__packed__)) { + struct __attribute__((__packed__)) { + unsigned dest : 2; /* = 11 */ + unsigned unknown_0 : 2; /* = 00 */ + unsigned source : 6; + unsigned alignment : 2; /* 0: float, 1:vec2, 2: vec4 */ + unsigned unknown_1 : 6; /* = 00 0000 */ + unsigned offset_reg : 6; + bool offset_en : 1; + unsigned index : 16; + } temp_write; + struct __attribute__((__packed__)) { + bool source : 1; /* 0 = fb_depth, 1 = fb_color */ + unsigned unknown_0 : 5; /* = 00 111 */ + unsigned dest : 4; + unsigned unknown_1 : 31; /* = 0 0000 ... 10 */ + } fb_read; +} ppir_codegen_field_temp_write; + +/* Result combiner */ + +typedef enum { + ppir_codegen_combine_scalar_op_rcp = 0, /* Reciprocal */ + ppir_codegen_combine_scalar_op_mov = 1, /* No Operation */ + ppir_codegen_combine_scalar_op_sqrt = 2, /* Square-Root */ + ppir_codegen_combine_scalar_op_rsqrt = 3, /* Inverse Square-Root */ + ppir_codegen_combine_scalar_op_exp2 = 4, /* Binary Exponent */ + ppir_codegen_combine_scalar_op_log2 = 5, /* Binary Logarithm */ + ppir_codegen_combine_scalar_op_sin = 6, /* Sine (Scaled LUT) */ + ppir_codegen_combine_scalar_op_cos = 7, /* Cosine (Scaled LUT) */ + ppir_codegen_combine_scalar_op_atan = 8, /* Arc Tangent Part 1 */ + ppir_codegen_combine_scalar_op_atan2 = 9, /* Arc Tangent 2 Part 1 */ +} ppir_codegen_combine_scalar_op; + +typedef union __attribute__((__packed__)) { + struct __attribute__((__packed__)) { + bool dest_vec : 1; + bool arg1_en : 1; + ppir_codegen_combine_scalar_op op : 4; + bool arg1_absolute : 1; + bool arg1_negate : 1; + unsigned arg1_src : 6; + bool arg0_absolute : 1; + bool arg0_negate : 1; + unsigned arg0_src : 6; + ppir_codegen_outmod dest_modifier : 2; + unsigned dest : 6; + } scalar; + struct __attribute__((__packed__)) { + bool dest_vec : 1; + bool arg1_en : 1; + unsigned arg1_swizzle : 8; + unsigned arg1_source : 4; + unsigned padding_0 : 8; + unsigned mask : 4; + unsigned dest : 4; + } vector; +} ppir_codegen_field_combine; + +/* Branch/Control Flow */ + +#define PPIR_CODEGEN_DISCARD_WORD0 0x007F0003 +#define PPIR_CODEGEN_DISCARD_WORD1 0x00000000 +#define PPIR_CODEGEN_DISCARD_WORD2 0x000 + +typedef union __attribute__((__packed__)) { + struct __attribute__((__packed__)) { + unsigned unknown_0 : 4; /* = 0000 */ + unsigned arg1_source : 6; + unsigned arg0_source : 6; + bool cond_gt : 1; + bool cond_eq : 1; + bool cond_lt : 1; + unsigned unknown_1 : 22; /* = 0 0000 0000 0000 0000 0000 0 */ + signed target : 27; + unsigned unknown_2 : 5; /* = 0 0011 */ + } branch; + struct __attribute__((__packed__)) { + unsigned word0 : 32; + unsigned word1 : 32; + unsigned word2 : 9; + } discard; +} ppir_codegen_field_branch; + +void ppir_disassemble_instr(uint32_t *instr, unsigned offset); + +#endif diff --git a/src/gallium/drivers/lima/ir/pp/disasm.c b/src/gallium/drivers/lima/ir/pp/disasm.c new file mode 100644 index 00000000000..d2a3fb17d03 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/disasm.c @@ -0,0 +1,776 @@ +/* + * Copyright (c) 2018 Lima Project + * + * Copyright (c) 2013 Codethink (http://www.codethink.co.uk) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_half.h" + +#include "ppir.h" +#include "codegen.h" + +typedef struct { + char *name; + unsigned srcs; +} asm_op; + +static void +print_swizzle(uint8_t swizzle) +{ + if (swizzle == 0xE4) + return; + + printf("."); + for (unsigned i = 0; i < 4; i++, swizzle >>= 2) + printf("%c", "xyzw"[swizzle & 3]); +} + +static void +print_mask(uint8_t mask) +{ + if (mask == 0xF) + return; + + printf("."); + if (mask & 1) printf("x"); + if (mask & 2) printf("y"); + if (mask & 4) printf("z"); + if (mask & 8) printf("w"); +} + +static void +print_reg(ppir_codegen_vec4_reg reg, const char *special) +{ + if (special) { + printf("%s", special); + } else { + switch (reg) + { + case ppir_codegen_vec4_reg_constant0: + printf("^const0"); + break; + case ppir_codegen_vec4_reg_constant1: + printf("^const1"); + break; + case ppir_codegen_vec4_reg_texture: + printf("^texture"); + break; + case ppir_codegen_vec4_reg_uniform: + printf("^uniform"); + break; + default: + printf("$%u", reg); + break; + } + } +} + +static void +print_vector_source(ppir_codegen_vec4_reg reg, const char *special, + uint8_t swizzle, bool abs, bool neg) +{ + if (neg) + printf("-"); + if (abs) + printf("abs("); + + print_reg(reg, special); + print_swizzle(swizzle); + + if (abs) + printf(")"); +} + +static void +print_source_scalar(unsigned reg, const char *special, bool abs, bool neg) +{ + if (neg) + printf("-"); + if (abs) + printf("abs("); + + print_reg(reg >> 2, special); + if (!special) + printf(".%c", "xyzw"[reg & 3]); + + if (abs) + printf(")"); +} + +static void +print_outmod(ppir_codegen_outmod modifier) +{ + switch (modifier) + { + case ppir_codegen_outmod_clamp_fraction: + printf(".sat"); + break; + case ppir_codegen_outmod_clamp_positive: + printf(".pos"); + break; + case ppir_codegen_outmod_round: + printf(".int"); + break; + default: + break; + } +} + +static void +print_dest_scalar(unsigned reg) +{ + printf("$%u", reg >> 2); + printf(".%c ", "xyzw"[reg & 3]); +} + +static void +print_const(unsigned const_num, uint16_t *val) +{ + printf("const%u", const_num); + for (unsigned i = 0; i < 4; i++) + printf(" %f", util_half_to_float(val[i])); +} + +static void +print_const0(void *code, unsigned offset) +{ + (void) offset; + + print_const(0, code); +} + +static void +print_const1(void *code, unsigned offset) +{ + (void) offset; + + print_const(1, code); +} + +static void +print_varying(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_varying *varying = code; + + printf("load"); + + bool perspective = varying->imm.source_type < 2 && varying->imm.perspective; + if (perspective) + { + printf(".perspective"); + switch (varying->imm.perspective) + { + case 2: + printf(".z"); + break; + case 3: + printf(".w"); + break; + default: + printf(".unknown"); + break; + } + } + + printf(".v "); + + switch (varying->imm.dest) + { + case ppir_codegen_vec4_reg_discard: + printf("^discard"); + break; + default: + printf("$%u", varying->imm.dest); + break; + } + print_mask(varying->imm.mask); + printf(" "); + + switch (varying->imm.source_type) { + case 1: + print_vector_source(varying->reg.source, NULL, varying->reg.swizzle, + varying->reg.absolute, varying->reg.negate); + break; + case 2: + printf("gl_FragCoord"); + break; + case 3: + if (varying->imm.perspective) + printf("gl_FrontFacing"); + else + printf("gl_PointCoord"); + break; + default: + switch (varying->imm.alignment) { + case 0: + printf("%u.%c", varying->imm.index >> 2, + "xyzw"[varying->imm.index & 3]); + break; + case 1: { + const char *c[2] = {"xy", "zw"}; + printf("%u.%s", varying->imm.index >> 1, c[varying->imm.index & 1]); + break; + } + default: + printf("%u", varying->imm.index); + break; + } + + if (varying->imm.offset_vector != 15) { + unsigned reg = (varying->imm.offset_vector << 2) + + varying->imm.offset_scalar; + printf("+"); + print_source_scalar(reg, NULL, false, false); + } + break; + } +} + +static void +print_sampler(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_sampler *sampler = code; + + printf("texld"); + if (sampler->lod_bias_en) + printf(".b"); + + switch (sampler->type) { + case ppir_codegen_sampler_type_2d: + printf(".2d"); + break; + case ppir_codegen_sampler_type_cube: + printf(".cube"); + break; + default: + printf("_t%u", sampler->type); + break; + } + + printf(" %u", sampler->index); + + if (sampler->offset_en) + { + printf("+"); + print_source_scalar(sampler->index_offset, NULL, false, false); + } + + if (sampler->lod_bias_en) + { + printf(" "); + print_source_scalar(sampler->lod_bias, NULL, false, false); + } +} + +static void +print_uniform(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_uniform *uniform = code; + + printf("load."); + + switch (uniform->source) { + case ppir_codegen_uniform_src_uniform: + printf("u"); + break; + case ppir_codegen_uniform_src_temporary: + printf("t"); + break; + default: + printf(".u%u", uniform->source); + break; + } + + if (uniform->alignment) + printf(" %u", uniform->index); + else + printf(" %u.%c", uniform->index >> 2, "xyzw"[uniform->index & 3]); + + if (uniform->offset_en) { + printf(" "); + print_source_scalar(uniform->offset_reg, NULL, false, false); + } +} + +#define CASE(_name, _srcs) \ +[ppir_codegen_vec4_mul_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ +} + +static const asm_op vec4_mul_ops[] = { + [0 ... 7] = { + .name = "mul", + .srcs = 2 + }, + CASE(not, 1), + CASE(and, 2), + CASE(or, 2), + CASE(xor, 2), + CASE(ne, 2), + CASE(gt, 2), + CASE(ge, 2), + CASE(eq, 2), + CASE(min, 2), + CASE(max, 2), + CASE(mov, 1), +}; + +#undef CASE + +static void +print_vec4_mul(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_vec4_mul *vec4_mul = code; + + asm_op op = vec4_mul_ops[vec4_mul->op]; + + if (op.name) + printf("%s", op.name); + else + printf("op%u", vec4_mul->op); + print_outmod(vec4_mul->dest_modifier); + printf(".v0 "); + + if (vec4_mul->mask) { + printf("$%u", vec4_mul->dest); + print_mask(vec4_mul->mask); + printf(" "); + } + + print_vector_source(vec4_mul->arg0_source, NULL, + vec4_mul->arg0_swizzle, + vec4_mul->arg0_absolute, + vec4_mul->arg0_negate); + + if (vec4_mul->op < 8 && vec4_mul->op != 0) { + printf("<<%u", vec4_mul->op); + } + + printf(" "); + + if (op.srcs > 1) { + print_vector_source(vec4_mul->arg1_source, NULL, + vec4_mul->arg1_swizzle, + vec4_mul->arg1_absolute, + vec4_mul->arg1_negate); + } +} + +#define CASE(_name, _srcs) \ +[ppir_codegen_vec4_acc_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ +} + +static const asm_op vec4_acc_ops[] = { + CASE(add, 2), + CASE(fract, 1), + CASE(ne, 2), + CASE(gt, 2), + CASE(ge, 2), + CASE(eq, 2), + CASE(floor, 1), + CASE(ceil, 1), + CASE(min, 2), + CASE(max, 2), + CASE(sum3, 1), + CASE(sum4, 1), + CASE(dFdx, 2), + CASE(dFdy, 2), + CASE(sel, 2), + CASE(mov, 1), +}; + +#undef CASE + +static void +print_vec4_acc(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_vec4_acc *vec4_acc = code; + + asm_op op = vec4_acc_ops[vec4_acc->op]; + + if (op.name) + printf("%s", op.name); + else + printf("op%u", vec4_acc->op); + print_outmod(vec4_acc->dest_modifier); + printf(".v1 "); + + if (vec4_acc->mask) { + printf("$%u", vec4_acc->dest); + print_mask(vec4_acc->mask); + printf(" "); + } + + print_vector_source(vec4_acc->arg0_source, vec4_acc->mul_in ? "^v0" : NULL, + vec4_acc->arg0_swizzle, + vec4_acc->arg0_absolute, + vec4_acc->arg0_negate); + + if (op.srcs > 1) { + printf(" "); + print_vector_source(vec4_acc->arg1_source, NULL, + vec4_acc->arg1_swizzle, + vec4_acc->arg1_absolute, + vec4_acc->arg1_negate); + } +} + +#define CASE(_name, _srcs) \ +[ppir_codegen_float_mul_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ +} + +static const asm_op float_mul_ops[] = { + [0 ... 7] = { + .name = "mul", + .srcs = 2 + }, + CASE(not, 1), + CASE(and, 2), + CASE(or, 2), + CASE(xor, 2), + CASE(ne, 2), + CASE(gt, 2), + CASE(ge, 2), + CASE(eq, 2), + CASE(min, 2), + CASE(max, 2), + CASE(mov, 1), +}; + +#undef CASE + +static void +print_float_mul(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_float_mul *float_mul = code; + + asm_op op = float_mul_ops[float_mul->op]; + + if (op.name) + printf("%s", op.name); + else + printf("op%u", float_mul->op); + print_outmod(float_mul->dest_modifier); + printf(".s0 "); + + if (float_mul->output_en) + print_dest_scalar(float_mul->dest); + + print_source_scalar(float_mul->arg0_source, NULL, + float_mul->arg0_absolute, + float_mul->arg0_negate); + + if (float_mul->op < 8 && float_mul->op != 0) { + printf("<<%u", float_mul->op); + } + + if (op.srcs > 1) { + printf(" "); + + print_source_scalar(float_mul->arg1_source, NULL, + float_mul->arg1_absolute, + float_mul->arg1_negate); + } +} + +#define CASE(_name, _srcs) \ +[ppir_codegen_float_acc_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ +} + +static const asm_op float_acc_ops[] = { + CASE(add, 2), + CASE(fract, 1), + CASE(ne, 2), + CASE(gt, 2), + CASE(ge, 2), + CASE(eq, 2), + CASE(floor, 1), + CASE(ceil, 1), + CASE(min, 2), + CASE(max, 2), + CASE(dFdx, 2), + CASE(dFdy, 2), + CASE(sel, 2), + CASE(mov, 1), +}; + +#undef CASE + +static void +print_float_acc(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_float_acc *float_acc = code; + + asm_op op = float_acc_ops[float_acc->op]; + + if (op.name) + printf("%s", op.name); + else + printf("op%u", float_acc->op); + print_outmod(float_acc->dest_modifier); + printf(".s1 "); + + if (float_acc->output_en) + print_dest_scalar(float_acc->dest); + + print_source_scalar(float_acc->arg0_source, float_acc->mul_in ? "^s0" : NULL, + float_acc->arg0_absolute, + float_acc->arg0_negate); + + if (op.srcs > 1) { + printf(" "); + print_source_scalar(float_acc->arg1_source, NULL, + float_acc->arg1_absolute, + float_acc->arg1_negate); + } +} + +#define CASE(_name, _srcs) \ +[ppir_codegen_combine_scalar_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ +} + +static const asm_op combine_ops[] = { + CASE(rcp, 1), + CASE(mov, 1), + CASE(sqrt, 1), + CASE(rsqrt, 1), + CASE(exp2, 1), + CASE(log2, 1), + CASE(sin, 1), + CASE(cos, 1), + CASE(atan, 1), + CASE(atan2, 1), +}; + +#undef CASE + +static void +print_combine(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_combine *combine = code; + + if (combine->scalar.dest_vec && + combine->scalar.arg1_en) { + /* This particular combination can only be valid for scalar * vector + * multiplies, and the opcode field is reused for something else. + */ + printf("mul"); + } else { + asm_op op = combine_ops[combine->scalar.op]; + + if (op.name) + printf("%s", op.name); + else + printf("op%u", combine->scalar.op); + } + + if (!combine->scalar.dest_vec) + print_outmod(combine->scalar.dest_modifier); + printf(".s2 "); + + if (combine->scalar.dest_vec) { + printf("$%u", combine->vector.dest); + print_mask(combine->vector.mask); + } else { + print_dest_scalar(combine->scalar.dest); + } + printf(" "); + + print_source_scalar(combine->scalar.arg0_src, NULL, + combine->scalar.arg0_absolute, + combine->scalar.arg0_negate); + printf(" "); + + if (combine->scalar.arg1_en) { + if (combine->scalar.dest_vec) { + print_vector_source(combine->vector.arg1_source, NULL, + combine->vector.arg1_swizzle, + false, false); + } else { + print_source_scalar(combine->scalar.arg1_src, NULL, + combine->scalar.arg1_absolute, + combine->scalar.arg1_negate); + } + } +} + +static void +print_temp_write(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_temp_write *temp_write = code; + + if (temp_write->fb_read.unknown_0 == 0x7) { + if (temp_write->fb_read.source) + printf("fb_color"); + else + printf("fb_depth"); + printf(" $%u", temp_write->fb_read.dest); + + return; + } + + printf("store.t"); + + if (temp_write->temp_write.alignment) { + printf(" %u", temp_write->temp_write.index); + } else { + printf(" %u.%c", temp_write->temp_write.index >> 2, + "xyzw"[temp_write->temp_write.index & 3]); + } + + if (temp_write->temp_write.offset_en) { + printf("+"); + print_source_scalar(temp_write->temp_write.offset_reg, + NULL, false, false); + } + + printf(" "); + + if (temp_write->temp_write.alignment) { + print_reg(temp_write->temp_write.source >> 2, NULL); + } else { + print_source_scalar(temp_write->temp_write.source, NULL, false, false); + } +} + +static void +print_branch(void *code, unsigned offset) +{ + ppir_codegen_field_branch *branch = code; + + if (branch->discard.word0 == PPIR_CODEGEN_DISCARD_WORD0 && + branch->discard.word1 == PPIR_CODEGEN_DISCARD_WORD1 && + branch->discard.word2 == PPIR_CODEGEN_DISCARD_WORD2) { + printf("discard"); + return; + } + + + const char* cond[] = { + "nv", "lt", "eq", "le", + "gt", "ne", "ge", "" , + }; + + unsigned cond_mask = 0; + cond_mask |= (branch->branch.cond_lt ? 1 : 0); + cond_mask |= (branch->branch.cond_eq ? 2 : 0); + cond_mask |= (branch->branch.cond_gt ? 4 : 0); + printf("branch"); + if (cond_mask != 0x7) { + printf(".%s ", cond[cond_mask]); + print_source_scalar(branch->branch.arg0_source, NULL, false, false); + printf(" "); + print_source_scalar(branch->branch.arg1_source, NULL, false, false); + } + + printf(" %d", branch->branch.target + offset); +} + +typedef void (*print_field_func)(void *, unsigned); + +static const print_field_func print_field[ppir_codegen_field_shift_count] = { + [ppir_codegen_field_shift_varying] = print_varying, + [ppir_codegen_field_shift_sampler] = print_sampler, + [ppir_codegen_field_shift_uniform] = print_uniform, + [ppir_codegen_field_shift_vec4_mul] = print_vec4_mul, + [ppir_codegen_field_shift_float_mul] = print_float_mul, + [ppir_codegen_field_shift_vec4_acc] = print_vec4_acc, + [ppir_codegen_field_shift_float_acc] = print_float_acc, + [ppir_codegen_field_shift_combine] = print_combine, + [ppir_codegen_field_shift_temp_write] = print_temp_write, + [ppir_codegen_field_shift_branch] = print_branch, + [ppir_codegen_field_shift_vec4_const_0] = print_const0, + [ppir_codegen_field_shift_vec4_const_1] = print_const1, +}; + +static const int ppir_codegen_field_size[] = { + 34, 62, 41, 43, 30, 44, 31, 30, 41, 73, 64, 64 +}; + +static void +bitcopy(char *src, char *dst, unsigned bits, unsigned src_offset) +{ + src += src_offset / 8; + src_offset %= 8; + + for (int b = bits; b > 0; b -= 8, src++, dst++) { + unsigned char out = ((unsigned char) *src) >> src_offset; + if (src_offset > 0 && src_offset + b > 8) + out |= ((unsigned char) *(src + 1)) << (8 - src_offset); + *dst = (char) out; + } +} + +void +ppir_disassemble_instr(uint32_t *instr, unsigned offset) +{ + ppir_codegen_ctrl *ctrl = (ppir_codegen_ctrl *) instr; + + char *instr_code = (char *) (instr + 1); + unsigned bit_offset = 0; + bool first = true; + for (unsigned i = 0; i < ppir_codegen_field_shift_count; i++) { + char code[12]; + + if (!((ctrl->fields >> i) & 1)) + continue; + + unsigned bits = ppir_codegen_field_size[i]; + bitcopy(instr_code, code, bits, bit_offset); + + if (first) + first = false; + else + printf(", "); + + print_field[i](code, offset); + + bit_offset += bits; + } + + if (ctrl->sync) + printf(", sync"); + if (ctrl->stop) + printf(", stop"); + + printf("\n"); +} + diff --git a/src/gallium/drivers/lima/ir/pp/instr.c b/src/gallium/drivers/lima/ir/pp/instr.c new file mode 100644 index 00000000000..ae296a4bb82 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/instr.c @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/ralloc.h" + +#include "ppir.h" + +ppir_instr *ppir_instr_create(ppir_block *block) +{ + ppir_instr *instr = rzalloc(block, ppir_instr); + if (!instr) + return NULL; + + list_inithead(&instr->succ_list); + list_inithead(&instr->pred_list); + + instr->index = block->comp->cur_instr_index++; + instr->reg_pressure = -1; + + list_addtail(&instr->list, &block->instr_list); + return instr; +} + +void ppir_instr_add_dep(ppir_instr *succ, ppir_instr *pred) +{ + /* don't add duplicated instr */ + ppir_instr_foreach_pred(succ, dep) { + if (pred == dep->pred) + return; + } + + ppir_dep *dep = ralloc(succ, ppir_dep); + dep->pred = pred; + dep->succ = succ; + list_addtail(&dep->pred_link, &succ->pred_list); + list_addtail(&dep->succ_link, &pred->succ_list); +} + +void ppir_instr_insert_mul_node(ppir_node *add, ppir_node *mul) +{ + ppir_instr *instr = add->instr; + int pos = mul->instr_pos; + int *slots = ppir_op_infos[mul->op].slots; + + for (int i = 0; slots[i] != PPIR_INSTR_SLOT_END; i++) { + /* possible to insert at required place */ + if (slots[i] == pos) { + if (!instr->slots[pos]) { + ppir_alu_node *add_alu = ppir_node_to_alu(add); + ppir_alu_node *mul_alu = ppir_node_to_alu(mul); + ppir_dest *dest = &mul_alu->dest; + int pipeline = pos == PPIR_INSTR_SLOT_ALU_VEC_MUL ? + ppir_pipeline_reg_vmul : ppir_pipeline_reg_fmul; + + /* ^vmul/^fmul can't be used as last arg */ + if (add_alu->num_src > 1) { + ppir_src *last_src = add_alu->src + add_alu->num_src - 1; + if (ppir_node_target_equal(last_src, dest)) + return; + } + + /* update add node src to use pipeline reg */ + ppir_src *src = add_alu->src; + if (add_alu->num_src == 3) { + if (ppir_node_target_equal(src, dest)) { + src->type = ppir_target_pipeline; + src->pipeline = pipeline; + } + + if (ppir_node_target_equal(++src, dest)) { + src->type = ppir_target_pipeline; + src->pipeline = pipeline; + } + } + else { + assert(ppir_node_target_equal(src, dest)); + src->type = ppir_target_pipeline; + src->pipeline = pipeline; + } + + /* update mul node dest to output to pipeline reg */ + dest->type = ppir_target_pipeline; + dest->pipeline = pipeline; + + instr->slots[pos] = mul; + mul->instr = instr; + } + return; + } + } +} + +/* check whether a const slot fix into another const slot */ +static bool ppir_instr_insert_const(ppir_const *dst, const ppir_const *src, + uint8_t *swizzle) +{ + int i, j; + + for (i = 0; i < src->num; i++) { + for (j = 0; j < dst->num; j++) { + if (src->value[i].ui == dst->value[j].ui) + break; + } + + if (j == dst->num) { + if (dst->num == 4) + return false; + dst->value[dst->num++] = src->value[i]; + } + + swizzle[i] = j; + } + + return true; +} + +/* make alu node src reflact the pipeline reg */ +static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipeline, + ppir_dest *dest, uint8_t *swizzle) +{ + for (int i = PPIR_INSTR_SLOT_ALU_START; i <= PPIR_INSTR_SLOT_ALU_END; i++) { + if (!instr->slots[i]) + continue; + + ppir_alu_node *alu = ppir_node_to_alu(instr->slots[i]); + for (int j = 0; j < alu->num_src; j++) { + ppir_src *src = alu->src + j; + if (ppir_node_target_equal(src, dest)) { + src->type = ppir_target_pipeline; + src->pipeline = pipeline; + + if (swizzle) { + for (int k = 0; k < 4; k++) + src->swizzle[k] = swizzle[src->swizzle[k]]; + } + } + } + } +} + +bool ppir_instr_insert_node(ppir_instr *instr, ppir_node *node) +{ + if (node->op == ppir_op_const) { + int i; + ppir_const_node *c = ppir_node_to_const(node); + const ppir_const *nc = &c->constant; + + for (i = 0; i < 2; i++) { + ppir_const ic = instr->constant[i]; + uint8_t swizzle[4] = {0}; + + if (ppir_instr_insert_const(&ic, nc, swizzle)) { + instr->constant[i] = ic; + ppir_instr_update_src_pipeline( + instr, ppir_pipeline_reg_const0 + i, &c->dest, swizzle); + break; + } + } + + /* no const slot can insert */ + if (i == 2) + return false; + + return true; + } + else { + int *slots = ppir_op_infos[node->op].slots; + for (int i = 0; slots[i] != PPIR_INSTR_SLOT_END; i++) { + int pos = slots[i]; + + if (instr->slots[pos]) { + /* node already in this instr, i.e. load_uniform */ + if (instr->slots[pos] == node) + return true; + else + continue; + } + + if (pos == PPIR_INSTR_SLOT_ALU_SCL_MUL || + pos == PPIR_INSTR_SLOT_ALU_SCL_ADD) { + ppir_dest *dest = ppir_node_get_dest(node); + if (!ppir_target_is_scaler(dest)) + continue; + } + + instr->slots[pos] = node; + node->instr = instr; + node->instr_pos = pos; + + if ((node->op == ppir_op_load_uniform) || (node->op == ppir_op_load_temp)) { + ppir_load_node *l = ppir_node_to_load(node); + ppir_instr_update_src_pipeline( + instr, ppir_pipeline_reg_uniform, &l->dest, NULL); + } + + return true; + } + + return false; + } +} + +static struct { + int len; + char *name; +} ppir_instr_fields[] = { + [PPIR_INSTR_SLOT_VARYING] = { 4, "vary" }, + [PPIR_INSTR_SLOT_TEXLD] = { 4, "texl"}, + [PPIR_INSTR_SLOT_UNIFORM] = { 4, "unif" }, + [PPIR_INSTR_SLOT_ALU_VEC_MUL] = { 4, "vmul" }, + [PPIR_INSTR_SLOT_ALU_SCL_MUL] = { 4, "smul" }, + [PPIR_INSTR_SLOT_ALU_VEC_ADD] = { 4, "vadd" }, + [PPIR_INSTR_SLOT_ALU_SCL_ADD] = { 4, "sadd" }, + [PPIR_INSTR_SLOT_ALU_COMBINE] = { 4, "comb" }, + [PPIR_INSTR_SLOT_STORE_TEMP] = { 4, "stor" }, +}; + +void ppir_instr_print_list(ppir_compiler *comp) +{ + if (!(lima_debug & LIMA_DEBUG_PP)) + return; + + printf("======ppir instr list======\n"); + printf(" "); + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) + printf("%-*s ", ppir_instr_fields[i].len, ppir_instr_fields[i].name); + printf("const0|1\n"); + + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + printf("%c%03d: ", instr->is_end ? '*' : ' ', instr->index); + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { + ppir_node *node = instr->slots[i]; + if (node) + printf("%-*d ", ppir_instr_fields[i].len, node->index); + else + printf("%-*s ", ppir_instr_fields[i].len, "null"); + } + for (int i = 0; i < 2; i++) { + if (i) + printf("| "); + + for (int j = 0; j < instr->constant[i].num; j++) + printf("%f ", instr->constant[i].value[j].f); + } + printf("\n"); + } + printf("------------------------\n"); + } +} + +static void ppir_instr_print_sub(ppir_instr *instr) +{ + printf("[%s%d", + instr->printed && !ppir_instr_is_leaf(instr) ? "+" : "", + instr->index); + + if (!instr->printed) { + ppir_instr_foreach_pred(instr, dep) { + ppir_instr_print_sub(dep->pred); + } + + instr->printed = true; + } + + printf("]"); +} + +void ppir_instr_print_dep(ppir_compiler *comp) +{ + if (!(lima_debug & LIMA_DEBUG_PP)) + return; + + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + instr->printed = false; + } + } + + printf("======ppir instr depend======\n"); + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + if (ppir_instr_is_root(instr)) { + ppir_instr_print_sub(instr); + printf("\n"); + } + } + printf("------------------------\n"); + } +} diff --git a/src/gallium/drivers/lima/ir/pp/lower.c b/src/gallium/drivers/lima/ir/pp/lower.c new file mode 100644 index 00000000000..e294f6740d1 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/lower.c @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/bitscan.h" +#include "util/ralloc.h" + +#include "ppir.h" + +static bool ppir_lower_const(ppir_block *block, ppir_node *node) +{ + if (ppir_node_is_root(node)) { + ppir_node_delete(node); + return true; + } + + ppir_node *move = NULL; + ppir_dest *dest = ppir_node_get_dest(node); + + /* const (register) can only be used in alu node, create a move + * node for other types of node */ + ppir_node_foreach_succ_safe(node, dep) { + ppir_node *succ = dep->succ; + + if (succ->type != ppir_node_type_alu) { + if (!move) { + move = ppir_node_create(block, ppir_op_mov, -1, 0); + if (unlikely(!move)) + return false; + + ppir_debug("lower const create move %d for %d\n", + move->index, node->index); + + ppir_alu_node *alu = ppir_node_to_alu(move); + alu->dest = *dest; + alu->num_src = 1; + ppir_node_target_assign(alu->src, dest); + for (int i = 0; i < 4; i++) + alu->src->swizzle[i] = i; + } + + ppir_node_replace_pred(dep, move); + ppir_node_replace_child(succ, node, move); + } + } + + if (move) { + ppir_node_add_dep(move, node); + list_addtail(&move->list, &node->list); + } + + return true; +} + +/* lower dot to mul+sum */ +static bool ppir_lower_dot(ppir_block *block, ppir_node *node) +{ + ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, 0); + if (!mul) + return false; + list_addtail(&mul->node.list, &node->list); + + ppir_alu_node *dot = ppir_node_to_alu(node); + mul->src[0] = dot->src[0]; + mul->src[1] = dot->src[1]; + mul->num_src = 2; + + int num_components = node->op - ppir_op_dot2 + 2; + ppir_dest *dest = &mul->dest; + dest->type = ppir_target_ssa; + dest->ssa.num_components = num_components; + dest->ssa.live_in = INT_MAX; + dest->ssa.live_out = 0; + dest->write_mask = u_bit_consecutive(0, num_components); + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node_remove_dep(dep); + ppir_node_add_dep(&mul->node, dep->pred); + } + ppir_node_add_dep(node, &mul->node); + + if (node->op == ppir_op_dot2) { + node->op = ppir_op_add; + + ppir_node_target_assign(dot->src, dest); + dot->src[0].swizzle[0] = 0; + dot->src[0].absolute = false; + dot->src[0].negate = false; + + ppir_node_target_assign(dot->src + 1, dest); + dot->src[1].swizzle[0] = 1; + dot->src[1].absolute = false; + dot->src[1].negate = false; + } + else { + node->op = node->op == ppir_op_dot3 ? ppir_op_sum3 : ppir_op_sum4; + + ppir_node_target_assign(dot->src, dest); + for (int i = 0; i < 4; i++) + dot->src[0].swizzle[i] = i; + dot->src[0].absolute = false; + dot->src[0].negate = false; + + dot->num_src = 1; + } + + return true; +} + +static ppir_reg *create_reg(ppir_compiler *comp, int num_components) +{ + ppir_reg *r = rzalloc(comp, ppir_reg); + if (!r) + return NULL; + + r->num_components = num_components; + r->live_in = INT_MAX; + r->live_out = 0; + r->is_head = false; + list_addtail(&r->list, &comp->reg_list); + + return r; +} + +/* lower vector alu node to multi scalar nodes */ +static bool ppir_lower_vec_to_scalar(ppir_block *block, ppir_node *node) +{ + ppir_alu_node *alu = ppir_node_to_alu(node); + ppir_dest *dest = &alu->dest; + + int n = 0; + int index[4]; + + unsigned mask = dest->write_mask; + while (mask) + index[n++] = u_bit_scan(&mask); + + if (n == 1) + return true; + + ppir_reg *r; + /* we need a reg for scalar nodes to store output */ + if (dest->type == ppir_target_register) + r = dest->reg; + else { + r = create_reg(block->comp, n); + if (!r) + return false; + + /* change all successors to use reg r */ + ppir_node_foreach_succ(node, dep) { + ppir_node *succ = dep->succ; + if (succ->type == ppir_node_type_alu) { + ppir_alu_node *sa = ppir_node_to_alu(succ); + for (int i = 0; i < sa->num_src; i++) { + ppir_src *src = sa->src + i; + if (ppir_node_target_equal(src, dest)) { + src->type = ppir_target_register; + src->reg = r; + } + } + } + else { + assert(succ->type == ppir_node_type_store); + ppir_store_node *ss = ppir_node_to_store(succ); + ppir_src *src = &ss->src; + src->type = ppir_target_register; + src->reg = r; + } + } + } + + /* create each component's scalar node */ + for (int i = 0; i < n; i++) { + ppir_node *s = ppir_node_create(block, node->op, -1, 0); + if (!s) + return false; + list_addtail(&s->list, &node->list); + + ppir_alu_node *sa = ppir_node_to_alu(s); + ppir_dest *sd = &sa->dest; + sd->type = ppir_target_register; + sd->reg = r; + sd->modifier = dest->modifier; + sd->write_mask = 1 << index[i]; + + for (int j = 0; j < alu->num_src; j++) + sa->src[j] = alu->src[j]; + sa->num_src = alu->num_src; + + /* TODO: need per reg component dependancy */ + ppir_node_foreach_succ(node, dep) { + ppir_node_add_dep(dep->succ, s); + } + + ppir_node_foreach_pred(node, dep) { + ppir_node_add_dep(s, dep->pred); + } + } + + ppir_node_delete(node); + return true; +} + +static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node) +{ + /* swapped op must be the next op */ + node->op++; + + assert(node->type == ppir_node_type_alu); + ppir_alu_node *alu = ppir_node_to_alu(node); + assert(alu->num_src == 2); + + ppir_src tmp = alu->src[0]; + alu->src[0] = alu->src[1]; + alu->src[1] = tmp; + return true; +} + +static bool ppir_lower_texture(ppir_block *block, ppir_node *node) +{ + ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node); + + if (ppir_node_has_single_pred(node)) { + ppir_node *pred = ppir_node_first_pred(node); + if (pred->op == ppir_op_load_varying) { + /* If ldtex is the only successor of load_varying node + * we're good. Just change load_varying op type to load_coords. + */ + if (ppir_node_has_single_succ(pred)) { + pred->op = ppir_op_load_coords; + return true; + } + } + } + + /* Otherwise we need to create load_coords node */ + ppir_load_node *load = ppir_node_create(block, ppir_op_load_coords, -1, 0); + if (!load) + return false; + list_addtail(&load->node.list, &node->list); + + ppir_debug("%s create load_coords node %d for %d\n", + __FUNCTION__, load->node.index, node->index); + + ppir_dest *dest = &load->dest; + dest->type = ppir_target_ssa; + dest->ssa.num_components = load_tex->src_coords.ssa->num_components; + dest->ssa.live_in = INT_MAX; + dest->ssa.live_out = 0; + dest->write_mask = u_bit_consecutive(0, dest->ssa.num_components); + + load->src = load_tex->src_coords; + + ppir_src *src = &load_tex->src_coords; + src->type = ppir_target_ssa; + src->ssa = &dest->ssa; + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_remove_dep(dep); + ppir_node_add_dep(&load->node, pred); + } + + ppir_node_add_dep(node, &load->node); + return true; +} + +/* Prepare for sin and cos and then lower vector alu node to multi + * scalar nodes */ +static bool ppir_lower_sin_cos_vec_to_scalar(ppir_block *block, ppir_node *node) +{ + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_node *inv_2pi_node = ppir_node_create(block, ppir_op_const, -1, 0); + if (!inv_2pi_node) + return false; + list_addtail(&inv_2pi_node->list, &node->list); + + /* For sin and cos, the input has to multiplied by the constant + * 1/(2*pi), presumably to simplify the hardware. */ + ppir_const_node *inv_2pi_const = ppir_node_to_const(inv_2pi_node); + inv_2pi_const->constant.num = 1; + inv_2pi_const->constant.value[0].f = (1.0f/(2.0f * M_PI)); + + inv_2pi_const->dest.type = ppir_target_ssa; + inv_2pi_const->dest.ssa.num_components = 1; + inv_2pi_const->dest.ssa.live_in = INT_MAX; + inv_2pi_const->dest.ssa.live_out = 0; + inv_2pi_const->dest.write_mask = 0x01; + + ppir_node *mul_node = ppir_node_create(block, ppir_op_mul, -1, 0); + if (!mul_node) + return false; + list_addtail(&mul_node->list, &node->list); + + ppir_alu_node *mul_alu = ppir_node_to_alu(mul_node); + mul_alu->num_src = 2; + mul_alu->src[0] = alu->src[0]; + mul_alu->src[1].type = ppir_target_ssa; + mul_alu->src[1].ssa = &inv_2pi_const->dest.ssa; + + int num_components = alu->src[0].ssa->num_components; + mul_alu->dest.type = ppir_target_ssa; + mul_alu->dest.ssa.num_components = num_components; + mul_alu->dest.ssa.live_in = INT_MAX; + mul_alu->dest.ssa.live_out = 0; + mul_alu->dest.write_mask = u_bit_consecutive(0, num_components); + + alu->src[0].type = ppir_target_ssa; + alu->src[0].ssa = &mul_alu->dest.ssa; + for (int i = 0; i < 4; i++) + alu->src->swizzle[i] = i; + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_remove_dep(dep); + ppir_node_add_dep(mul_node, pred); + } + ppir_node_add_dep(node, mul_node); + ppir_node_add_dep(mul_node, inv_2pi_node); + + return ppir_lower_vec_to_scalar(block, node); +} + +/* insert a move as the select condition to make sure it can + * be inserted to select instr float mul slot + */ +static bool ppir_lower_select(ppir_block *block, ppir_node *node) +{ + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0); + if (!move) + return false; + list_addtail(&move->list, &node->list); + + ppir_alu_node *move_alu = ppir_node_to_alu(move); + ppir_src *move_src = move_alu->src, *src = alu->src; + move_src->type = src->type; + move_src->ssa = src->ssa; + move_src->swizzle[0] = src->swizzle[0]; + move_alu->num_src = 1; + + ppir_dest *move_dest = &move_alu->dest; + move_dest->type = ppir_target_ssa; + move_dest->ssa.num_components = 1; + move_dest->ssa.live_in = INT_MAX; + move_dest->ssa.live_out = 0; + move_dest->write_mask = 1; + + ppir_node_foreach_pred(node, dep) { + ppir_node *pred = dep->pred; + ppir_dest *dest = ppir_node_get_dest(pred); + if (ppir_node_target_equal(alu->src, dest)) { + ppir_node_replace_pred(dep, move); + ppir_node_add_dep(move, pred); + } + } + + /* move must be the first pred of select node which make sure + * the float mul slot is free when node to instr + */ + assert(ppir_node_first_pred(node) == move); + + src->swizzle[0] = 0; + ppir_node_target_assign(alu->src, move_dest); + return true; +} + +static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = { + [ppir_op_const] = ppir_lower_const, + [ppir_op_dot2] = ppir_lower_dot, + [ppir_op_dot3] = ppir_lower_dot, + [ppir_op_dot4] = ppir_lower_dot, + [ppir_op_rcp] = ppir_lower_vec_to_scalar, + [ppir_op_rsqrt] = ppir_lower_vec_to_scalar, + [ppir_op_log2] = ppir_lower_vec_to_scalar, + [ppir_op_exp2] = ppir_lower_vec_to_scalar, + [ppir_op_sqrt] = ppir_lower_vec_to_scalar, + [ppir_op_sin] = ppir_lower_sin_cos_vec_to_scalar, + [ppir_op_cos] = ppir_lower_sin_cos_vec_to_scalar, + [ppir_op_lt] = ppir_lower_swap_args, + [ppir_op_le] = ppir_lower_swap_args, + [ppir_op_load_texture] = ppir_lower_texture, + [ppir_op_select] = ppir_lower_select, +}; + +bool ppir_lower_prog(ppir_compiler *comp) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry_safe(ppir_node, node, &block->node_list, list) { + if (ppir_lower_funcs[node->op] && + !ppir_lower_funcs[node->op](block, node)) + return false; + } + } + + ppir_node_print_prog(comp); + return true; +} diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c new file mode 100644 index 00000000000..0a5fe13e312 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/nir.c @@ -0,0 +1,494 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include + +#include "util/ralloc.h" +#include "util/bitscan.h" +#include "compiler/nir/nir.h" + +#include "ppir.h" + +static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa) +{ + ppir_node *node = ppir_node_create(block, op, ssa->index, 0); + if (!node) + return NULL; + + ppir_dest *dest = ppir_node_get_dest(node); + dest->type = ppir_target_ssa; + dest->ssa.num_components = ssa->num_components; + dest->ssa.live_in = INT_MAX; + dest->ssa.live_out = 0; + dest->write_mask = u_bit_consecutive(0, ssa->num_components); + + if (node->type == ppir_node_type_load || + node->type == ppir_node_type_store) + dest->ssa.is_head = true; + + return node; +} + +static void *ppir_node_create_reg(ppir_block *block, ppir_op op, + nir_reg_dest *reg, unsigned mask) +{ + ppir_node *node = ppir_node_create(block, op, reg->reg->index, mask); + if (!node) + return NULL; + + ppir_dest *dest = ppir_node_get_dest(node); + + list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) { + if (r->index == reg->reg->index) { + dest->reg = r; + break; + } + } + + dest->type = ppir_target_register; + dest->write_mask = mask; + + if (node->type == ppir_node_type_load || + node->type == ppir_node_type_store) + dest->reg->is_head = true; + + return node; +} + +static void *ppir_node_create_dest(ppir_block *block, ppir_op op, + nir_dest *dest, unsigned mask) +{ + unsigned index = -1; + + if (dest) { + if (dest->is_ssa) + return ppir_node_create_ssa(block, op, &dest->ssa); + else + return ppir_node_create_reg(block, op, &dest->reg, mask); + } + + return ppir_node_create(block, op, index, 0); +} + +static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node, + ppir_src *ps, nir_src *ns, unsigned mask) +{ + ppir_node *child = NULL; + + if (ns->is_ssa) { + child = comp->var_nodes[ns->ssa->index]; + ppir_node_add_dep(node, child); + } + else { + nir_register *reg = ns->reg.reg; + while (mask) { + int swizzle = ps->swizzle[u_bit_scan(&mask)]; + child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle]; + ppir_node_add_dep(node, child); + } + } + + ppir_dest *dest = ppir_node_get_dest(child); + ppir_node_target_assign(ps, dest); +} + +static int nir_to_ppir_opcodes[nir_num_opcodes] = { + /* not supported */ + [0 ... nir_last_opcode] = -1, + + [nir_op_fmov] = ppir_op_mov, + [nir_op_imov] = ppir_op_mov, + [nir_op_fmul] = ppir_op_mul, + [nir_op_fadd] = ppir_op_add, + [nir_op_fdot2] = ppir_op_dot2, + [nir_op_fdot3] = ppir_op_dot3, + [nir_op_fdot4] = ppir_op_dot4, + [nir_op_frsq] = ppir_op_rsqrt, + [nir_op_flog2] = ppir_op_log2, + [nir_op_fexp2] = ppir_op_exp2, + [nir_op_fsqrt] = ppir_op_sqrt, + [nir_op_fsin] = ppir_op_sin, + [nir_op_fcos] = ppir_op_cos, + [nir_op_fmax] = ppir_op_max, + [nir_op_fmin] = ppir_op_min, + [nir_op_frcp] = ppir_op_rcp, + [nir_op_ffloor] = ppir_op_floor, + [nir_op_ffract] = ppir_op_fract, + [nir_op_fand] = ppir_op_and, + [nir_op_for] = ppir_op_or, + [nir_op_fxor] = ppir_op_xor, + [nir_op_sge] = ppir_op_ge, + [nir_op_fge] = ppir_op_ge, + [nir_op_slt] = ppir_op_lt, + [nir_op_flt] = ppir_op_lt, + [nir_op_seq] = ppir_op_eq, + [nir_op_feq] = ppir_op_eq, + [nir_op_sne] = ppir_op_ne, + [nir_op_fne] = ppir_op_ne, + [nir_op_fnot] = ppir_op_not, + [nir_op_bcsel] = ppir_op_select, + [nir_op_inot] = ppir_op_not, + [nir_op_b2f32] = ppir_op_mov, +}; + +static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni) +{ + nir_alu_instr *instr = nir_instr_as_alu(ni); + int op = nir_to_ppir_opcodes[instr->op]; + + if (op < 0) { + ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name); + return NULL; + } + + ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest, + instr->dest.write_mask); + if (!node) + return NULL; + + ppir_dest *pd = &node->dest; + nir_alu_dest *nd = &instr->dest; + if (nd->saturate) + pd->modifier = ppir_outmod_clamp_fraction; + + unsigned src_mask; + switch (op) { + case ppir_op_dot2: + src_mask = 0b0011; + break; + case ppir_op_dot3: + src_mask = 0b0111; + break; + case ppir_op_dot4: + src_mask = 0b1111; + break; + default: + src_mask = pd->write_mask; + break; + } + + unsigned num_child = nir_op_infos[instr->op].num_inputs; + node->num_src = num_child; + + for (int i = 0; i < num_child; i++) { + nir_alu_src *ns = instr->src + i; + ppir_src *ps = node->src + i; + memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle)); + ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask); + + ps->absolute = ns->abs; + ps->negate = ns->negate; + } + + return &node->node; +} + +static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) +{ + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); + unsigned mask = 0; + ppir_load_node *lnode; + ppir_store_node *snode; + nir_const_value *const_offset; + + switch (instr->intrinsic) { + case nir_intrinsic_load_input: + if (!instr->dest.is_ssa) + mask = u_bit_consecutive(0, instr->num_components); + + lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask); + if (!lnode) + return NULL; + + lnode->num_components = instr->num_components; + lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr); + return &lnode->node; + + case nir_intrinsic_load_uniform: + if (!instr->dest.is_ssa) + mask = u_bit_consecutive(0, instr->num_components); + + lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask); + if (!lnode) + return NULL; + + lnode->num_components = instr->num_components; + lnode->index = nir_intrinsic_base(instr); + + const_offset = nir_src_as_const_value(instr->src[0]); + assert(const_offset); + lnode->index += (uint32_t)const_offset->f32[0]; + + return &lnode->node; + + case nir_intrinsic_store_output: + snode = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0); + if (!snode) + return NULL; + + snode->index = nir_intrinsic_base(instr); + + for (int i = 0; i < instr->num_components; i++) + snode->src.swizzle[i] = i; + + ppir_node_add_src(block->comp, &snode->node, &snode->src, instr->src, + u_bit_consecutive(0, instr->num_components)); + + return &snode->node; + + default: + ppir_error("unsupported nir_intrinsic_instr %d\n", instr->intrinsic); + return NULL; + } +} + +static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni) +{ + nir_load_const_instr *instr = nir_instr_as_load_const(ni); + ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def); + if (!node) + return NULL; + + assert(instr->def.bit_size == 32); + + for (int i = 0; i < instr->def.num_components; i++) + node->constant.value[i].i = instr->value.i32[i]; + node->constant.num = instr->def.num_components; + + return &node->node; +} + +static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni) +{ + ppir_error("nir_ssa_undef_instr not support\n"); + return NULL; +} + +static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni) +{ + nir_tex_instr *instr = nir_instr_as_tex(ni); + ppir_load_texture_node *node; + + if (instr->op != nir_texop_tex) { + ppir_error("unsupported texop %d\n", instr->op); + return NULL; + } + + node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, 0); + if (!node) + return NULL; + + node->sampler = instr->texture_index; + + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_EXTERNAL: + break; + default: + ppir_debug("unsupported sampler dim: %d\n", instr->sampler_dim); + return NULL; + } + + node->sampler_dim = instr->sampler_dim; + + for (int i = 0; i < instr->coord_components; i++) + node->src_coords.swizzle[i] = i; + + assert(instr->num_srcs == 1); + for (int i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_coord: + ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src, + u_bit_consecutive(0, instr->coord_components)); + break; + default: + ppir_debug("unknown texture source"); + return NULL; + } + } + + return &node->node; +} + +static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni) +{ + ppir_error("nir_jump_instr not support\n"); + return NULL; +} + +static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = { + [nir_instr_type_alu] = ppir_emit_alu, + [nir_instr_type_intrinsic] = ppir_emit_intrinsic, + [nir_instr_type_load_const] = ppir_emit_load_const, + [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef, + [nir_instr_type_tex] = ppir_emit_tex, + [nir_instr_type_jump] = ppir_emit_jump, +}; + +static ppir_block *ppir_block_create(ppir_compiler *comp) +{ + ppir_block *block = rzalloc(comp, ppir_block); + if (!block) + return NULL; + + list_inithead(&block->node_list); + list_inithead(&block->instr_list); + + return block; +} + +static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock) +{ + ppir_block *block = ppir_block_create(comp); + if (!block) + return false; + + list_addtail(&block->list, &comp->block_list); + block->comp = comp; + + nir_foreach_instr(instr, nblock) { + assert(instr->type < nir_instr_type_phi); + ppir_node *node = ppir_emit_instr[instr->type](block, instr); + if (node) + list_addtail(&node->list, &block->node_list); + } + + return true; +} + +static bool ppir_emit_if(ppir_compiler *comp, nir_if *nif) +{ + ppir_error("if nir_cf_node not support\n"); + return false; +} + +static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop) +{ + ppir_error("loop nir_cf_node not support\n"); + return false; +} + +static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc) +{ + ppir_error("function nir_cf_node not support\n"); + return false; +} + +static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) { + bool ret; + + switch (node->type) { + case nir_cf_node_block: + ret = ppir_emit_block(comp, nir_cf_node_as_block(node)); + break; + case nir_cf_node_if: + ret = ppir_emit_if(comp, nir_cf_node_as_if(node)); + break; + case nir_cf_node_loop: + ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node)); + break; + case nir_cf_node_function: + ret = ppir_emit_function(comp, nir_cf_node_as_function(node)); + break; + default: + ppir_error("unknown NIR node type %d\n", node->type); + return false; + } + + if (!ret) + return false; + } + + return true; +} + +static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa) +{ + ppir_compiler *comp = rzalloc_size( + prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *)); + if (!comp) + return NULL; + + list_inithead(&comp->block_list); + list_inithead(&comp->reg_list); + + comp->var_nodes = (ppir_node **)(comp + 1); + comp->reg_base = num_ssa; + comp->prog = prog; + return comp; +} + +bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir, + struct ra_regs *ra) +{ + nir_function_impl *func = nir_shader_get_entrypoint(nir); + ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc); + if (!comp) + return false; + + comp->ra = ra; + + foreach_list_typed(nir_register, reg, node, &func->registers) { + ppir_reg *r = rzalloc(comp, ppir_reg); + if (!r) + return false; + + r->index = reg->index; + r->num_components = reg->num_components; + r->live_in = INT_MAX; + r->live_out = 0; + r->is_head = false; + list_addtail(&r->list, &comp->reg_list); + } + + if (!ppir_emit_cf_list(comp, &func->body)) + goto err_out0; + ppir_node_print_prog(comp); + + if (!ppir_lower_prog(comp)) + goto err_out0; + + if (!ppir_node_to_instr(comp)) + goto err_out0; + + if (!ppir_schedule_prog(comp)) + goto err_out0; + + if (!ppir_regalloc_prog(comp)) + goto err_out0; + + if (!ppir_codegen_prog(comp)) + goto err_out0; + + ralloc_free(comp); + return true; + +err_out0: + ralloc_free(comp); + return false; +} + diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c new file mode 100644 index 00000000000..9c871abb4c9 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/node.c @@ -0,0 +1,426 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_math.h" +#include "util/ralloc.h" +#include "util/bitscan.h" + +#include "ppir.h" + +const ppir_op_info ppir_op_infos[] = { + [ppir_op_mov] = { + .name = "mov", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL, + PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_mul] = { + .name = "mul", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_add] = { + .name = "add", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_dot2] = { + .name = "dot2", + }, + [ppir_op_dot3] = { + .name = "dot3", + }, + [ppir_op_dot4] = { + .name = "dot4", + }, + [ppir_op_sum3] = { + .name = "sum3", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_sum4] = { + .name = "sum4", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_rsqrt] = { + .name = "rsqrt", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_log2] = { + .name = "log2", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_exp2] = { + .name = "exp2", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_sqrt] = { + .name = "sqrt", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_sin] = { + .name = "sin", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_cos] = { + .name = "cos", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_max] = { + .name = "max", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL, + PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_min] = { + .name = "min", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL, + PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_floor] = { + .name = "floor", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_fract] = { + .name = "fract", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_and] = { + .name = "and", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_or] = { + .name = "or", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_xor] = { + .name = "xor", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_not] = { + .name = "not", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_lt] = { + .name = "lt", + }, + [ppir_op_le] = { + .name = "le", + }, + [ppir_op_gt] = { + .name = "gt", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD, + PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_ge] = { + .name = "ge", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD, + PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_eq] = { + .name = "eq", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD, + PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_ne] = { + .name = "ne", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD, + PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_select] = { + .name = "select", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_rcp] = { + .name = "rcp", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_load_varying] = { + .name = "ld_var", + .type = ppir_node_type_load, + .slots = (int []) { + PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_load_coords] = { + .name = "ld_coords", + .type = ppir_node_type_load, + .slots = (int []) { + PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_load_uniform] = { + .name = "ld_uni", + .type = ppir_node_type_load, + .slots = (int []) { + PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_load_texture] = { + .name = "ld_tex", + .type = ppir_node_type_load_texture, + .slots = (int []) { + PPIR_INSTR_SLOT_TEXLD, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_load_temp] = { + .name = "ld_temp", + .type = ppir_node_type_load, + .slots = (int []) { + PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_const] = { + .name = "const", + .type = ppir_node_type_const, + }, + [ppir_op_store_color] = { + .name = "st_col", + .type = ppir_node_type_store, + }, + [ppir_op_store_temp] = { + .name = "st_temp", + .type = ppir_node_type_store, + .slots = (int []) { + PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END + }, + }, +}; + +void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask) +{ + ppir_compiler *comp = block->comp; + static const int node_size[] = { + [ppir_node_type_alu] = sizeof(ppir_alu_node), + [ppir_node_type_const] = sizeof(ppir_const_node), + [ppir_node_type_load] = sizeof(ppir_load_node), + [ppir_node_type_store] = sizeof(ppir_store_node), + [ppir_node_type_load_texture] = sizeof(ppir_load_texture_node), + }; + + ppir_node_type type = ppir_op_infos[op].type; + int size = node_size[type]; + ppir_node *node = rzalloc_size(block, size); + if (!node) + return NULL; + + list_inithead(&node->succ_list); + list_inithead(&node->pred_list); + + if (index >= 0) { + if (mask) { + /* reg has 4 slots for each componemt write node */ + while (mask) + comp->var_nodes[(index << 2) + comp->reg_base + u_bit_scan(&mask)] = node; + snprintf(node->name, sizeof(node->name), "reg%d", index); + } else { + comp->var_nodes[index] = node; + snprintf(node->name, sizeof(node->name), "ssa%d", index); + } + } + else + snprintf(node->name, sizeof(node->name), "new"); + + node->op = op; + node->type = type; + node->index = comp->cur_index++; + node->block = block; + + return node; +} + +void ppir_node_add_dep(ppir_node *succ, ppir_node *pred) +{ + /* don't add dep for two nodes from different block */ + if (succ->block != pred->block) + return; + + /* don't add duplicated dep */ + ppir_node_foreach_pred(succ, dep) { + if (dep->pred == pred) + return; + } + + ppir_dep *dep = ralloc(succ, ppir_dep); + dep->pred = pred; + dep->succ = succ; + list_addtail(&dep->pred_link, &succ->pred_list); + list_addtail(&dep->succ_link, &pred->succ_list); +} + +void ppir_node_remove_dep(ppir_dep *dep) +{ + list_del(&dep->succ_link); + list_del(&dep->pred_link); + ralloc_free(dep); +} + +static void _ppir_node_replace_child(ppir_src *src, ppir_node *old_child, ppir_node *new_child) +{ + ppir_dest *od = ppir_node_get_dest(old_child); + if (ppir_node_target_equal(src, od)) { + ppir_dest *nd = ppir_node_get_dest(new_child); + ppir_node_target_assign(src, nd); + } +} + +void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child) +{ + if (parent->type == ppir_node_type_alu) { + ppir_alu_node *alu = ppir_node_to_alu(parent); + for (int i = 0; i < alu->num_src; i++) + _ppir_node_replace_child(alu->src + i, old_child, new_child); + } + else if (parent->type == ppir_node_type_store) { + ppir_store_node *store = ppir_node_to_store(parent); + _ppir_node_replace_child(&store->src, old_child, new_child); + } +} + +void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred) +{ + list_del(&dep->succ_link); + dep->pred = new_pred; + list_addtail(&dep->succ_link, &new_pred->succ_list); +} + +void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src) +{ + ppir_node_foreach_succ_safe(src, dep) { + ppir_node_replace_pred(dep, dst); + ppir_node_replace_child(dep->succ, src, dst); + } +} + +void ppir_node_delete(ppir_node *node) +{ + ppir_node_foreach_succ_safe(node, dep) + ppir_node_remove_dep(dep); + + ppir_node_foreach_pred_safe(node, dep) + ppir_node_remove_dep(dep); + + list_del(&node->list); + ralloc_free(node); +} + +static void ppir_node_print_node(ppir_node *node, int space) +{ + for (int i = 0; i < space; i++) + printf(" "); + printf("%s%s %d %s\n", node->printed && !ppir_node_is_leaf(node) ? "+" : "", + ppir_op_infos[node->op].name, node->index, node->name); + + if (!node->printed) { + ppir_node_foreach_pred(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_print_node(pred, space + 2); + } + + node->printed = true; + } +} + +void ppir_node_print_prog(ppir_compiler *comp) +{ + if (!(lima_debug & LIMA_DEBUG_PP)) + return; + + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_node, node, &block->node_list, list) { + node->printed = false; + } + } + + printf("========prog========\n"); + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + printf("-------block------\n"); + list_for_each_entry(ppir_node, node, &block->node_list, list) { + if (ppir_node_is_root(node)) + ppir_node_print_node(node, 0); + } + } + printf("====================\n"); +} diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c new file mode 100644 index 00000000000..26d2c9868f6 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "ppir.h" + + +static bool create_new_instr(ppir_block *block, ppir_node *node) +{ + ppir_instr *instr = ppir_instr_create(block); + if (unlikely(!instr)) + return false; + + if (!ppir_instr_insert_node(instr, node)) + return false; + + return true; +} + +static bool insert_to_load_tex(ppir_block *block, ppir_node *load_coords, ppir_node *ldtex) +{ + ppir_dest *dest = ppir_node_get_dest(ldtex); + ppir_node *move = NULL; + + ppir_load_node *load = ppir_node_to_load(load_coords); + load->dest.type = ppir_target_pipeline; + load->dest.pipeline = ppir_pipeline_reg_discard; + + ppir_load_texture_node *load_texture = ppir_node_to_load_texture(ldtex); + load_texture->src_coords.type = ppir_target_pipeline; + load_texture->src_coords.pipeline = ppir_pipeline_reg_discard; + + /* Insert load_coords to ldtex instruction */ + if (!ppir_instr_insert_node(ldtex->instr, load_coords)) + return false; + + /* Create move node */ + move = ppir_node_create(block, ppir_op_mov, -1 , 0); + if (unlikely(!move)) + return false; + + ppir_debug("insert_load_tex: create move %d for %d\n", + move->index, ldtex->index); + + ppir_alu_node *alu = ppir_node_to_alu(move); + alu->dest = *dest; + + ppir_node_replace_all_succ(move, ldtex); + + dest->type = ppir_target_pipeline; + dest->pipeline = ppir_pipeline_reg_sampler; + + alu->num_src = 1; + ppir_node_target_assign(&alu->src[0], dest); + for (int i = 0; i < 4; i++) + alu->src->swizzle[i] = i; + + ppir_node_add_dep(move, ldtex); + list_addtail(&move->list, &ldtex->list); + + if (!ppir_instr_insert_node(ldtex->instr, move)) + return false; + + return true; +} + +static bool insert_to_each_succ_instr(ppir_block *block, ppir_node *node) +{ + ppir_dest *dest = ppir_node_get_dest(node); + assert(dest->type == ppir_target_ssa); + + ppir_node *move = NULL; + + ppir_node_foreach_succ_safe(node, dep) { + ppir_node *succ = dep->succ; + assert(succ->type == ppir_node_type_alu); + + if (!ppir_instr_insert_node(succ->instr, node)) { + /* create a move node to insert for failed node */ + if (!move) { + move = ppir_node_create(block, ppir_op_mov, -1, 0); + if (unlikely(!move)) + return false; + + ppir_debug("node_to_instr create move %d for %d\n", + move->index, node->index); + + ppir_alu_node *alu = ppir_node_to_alu(move); + alu->dest = *dest; + alu->num_src = 1; + ppir_node_target_assign(alu->src, dest); + for (int i = 0; i < 4; i++) + alu->src->swizzle[i] = i; + } + + ppir_node_replace_pred(dep, move); + ppir_node_replace_child(succ, node, move); + } + } + + if (move) { + if (!create_new_instr(block, move)) + return false; + + MAYBE_UNUSED bool insert_result = + ppir_instr_insert_node(move->instr, node); + assert(insert_result); + + ppir_node_add_dep(move, node); + list_addtail(&move->list, &node->list); + } + + /* dupliacte node for each successor */ + + bool first = true; + struct list_head dup_list; + list_inithead(&dup_list); + + ppir_node_foreach_succ_safe(node, dep) { + ppir_node *succ = dep->succ; + + if (first) { + first = false; + node->instr = succ->instr; + continue; + } + + if (succ->instr == node->instr) + continue; + + list_for_each_entry(ppir_node, dup, &dup_list, list) { + if (succ->instr == dup->instr) { + ppir_node_replace_pred(dep, dup); + continue; + } + } + + ppir_node *dup = ppir_node_create(block, node->op, -1, 0); + if (unlikely(!dup)) + return false; + list_addtail(&dup->list, &dup_list); + + ppir_debug("node_to_instr duplicate %s %d from %d\n", + ppir_op_infos[dup->op].name, dup->index, node->index); + + ppir_instr *instr = succ->instr; + dup->instr = instr; + dup->instr_pos = node->instr_pos; + ppir_node_replace_pred(dep, dup); + + if ((node->op == ppir_op_load_uniform) || (node->op == ppir_op_load_temp)) { + ppir_load_node *load = ppir_node_to_load(node); + ppir_load_node *dup_load = ppir_node_to_load(dup); + dup_load->dest = load->dest; + dup_load->index = load->index; + dup_load->num_components = load->num_components; + instr->slots[node->instr_pos] = dup; + } + } + + list_splicetail(&dup_list, &node->list); + + return true; +} + +static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node) +{ + switch (node->type) { + case ppir_node_type_alu: + { + /* merge pred mul and succ add in the same instr can save a reg + * by using pipeline reg ^vmul/^fmul */ + ppir_alu_node *alu = ppir_node_to_alu(node); + if (alu->dest.type == ppir_target_ssa && + ppir_node_has_single_succ(node)) { + ppir_node *succ = ppir_node_first_succ(node); + if (succ->instr_pos == PPIR_INSTR_SLOT_ALU_VEC_ADD) { + node->instr_pos = PPIR_INSTR_SLOT_ALU_VEC_MUL; + /* select instr's condition must be inserted to fmul slot */ + if (succ->op == ppir_op_select && + ppir_node_first_pred(succ) == node) { + assert(alu->dest.ssa.num_components == 1); + node->instr_pos = PPIR_INSTR_SLOT_ALU_SCL_MUL; + } + ppir_instr_insert_mul_node(succ, node); + } + else if (succ->instr_pos == PPIR_INSTR_SLOT_ALU_SCL_ADD && + alu->dest.ssa.num_components == 1) { + node->instr_pos = PPIR_INSTR_SLOT_ALU_SCL_MUL; + ppir_instr_insert_mul_node(succ, node); + } + } + + /* can't inserted to any existing instr, create one */ + if (!node->instr && !create_new_instr(block, node)) + return false; + + break; + } + case ppir_node_type_load: + if ((node->op == ppir_op_load_uniform) || (node->op == ppir_op_load_temp)) { + /* merge pred load_uniform into succ instr can save a reg + * by using pipeline reg */ + if (!insert_to_each_succ_instr(block, node)) + return false; + + ppir_load_node *load = ppir_node_to_load(node); + load->dest.type = ppir_target_pipeline; + load->dest.pipeline = ppir_pipeline_reg_uniform; + } + else if (node->op == ppir_op_load_temp) { + /* merge pred load_temp into succ instr can save a reg + * by using pipeline reg */ + if (!insert_to_each_succ_instr(block, node)) + return false; + + ppir_load_node *load = ppir_node_to_load(node); + load->dest.type = ppir_target_pipeline; + load->dest.pipeline = ppir_pipeline_reg_uniform; + } + else if (node->op == ppir_op_load_varying) { + /* delay the load varying dup to scheduler */ + if (!create_new_instr(block, node)) + return false; + } + else if (node->op == ppir_op_load_coords) { + ppir_node *ldtex = ppir_node_first_succ(node); + if (!insert_to_load_tex(block, node, ldtex)) + return false; + } + else { + /* not supported yet */ + assert(0); + return false; + } + break; + case ppir_node_type_load_texture: + if (!create_new_instr(block, node)) + return false; + break; + case ppir_node_type_const: + if (!insert_to_each_succ_instr(block, node)) + return false; + break; + case ppir_node_type_store: + { + if (node->op == ppir_op_store_temp) { + if (!create_new_instr(block, node)) + return false; + break; + } + + /* Only the store color node should appear here. + * Currently we always insert a move node as the end instr. + * But it should only be done when: + * 1. store a const node + * 2. store a load node + * 3. store a reg assigned in another block like loop/if + */ + + assert(node->op == ppir_op_store_color); + + ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0); + if (unlikely(!move)) + return false; + + ppir_debug("node_to_instr create move %d from store %d\n", + move->index, node->index); + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + /* we can't do this in this function except here as this + * store is the root of this recursion */ + ppir_node_remove_dep(dep); + ppir_node_add_dep(move, pred); + } + + ppir_node_add_dep(node, move); + list_addtail(&move->list, &node->list); + + ppir_alu_node *alu = ppir_node_to_alu(move); + ppir_store_node *store = ppir_node_to_store(node); + alu->src[0] = store->src; + alu->num_src = 1; + + alu->dest.type = ppir_target_ssa; + alu->dest.ssa.num_components = 4; + alu->dest.ssa.live_in = INT_MAX; + alu->dest.ssa.live_out = 0; + alu->dest.write_mask = 0xf; + + store->src.type = ppir_target_ssa; + store->src.ssa = &alu->dest.ssa; + + if (!create_new_instr(block, move)) + return false; + + move->instr->is_end = true; + node->instr = move->instr; + + /* use move for the following recursion */ + node = move; + break; + } + default: + return false; + } + + /* we have to make sure the dep not be destroyed (due to + * succ change) in ppir_do_node_to_instr, otherwise we can't + * do recursion like this */ + ppir_node_foreach_pred(node, dep) { + ppir_node *pred = dep->pred; + bool ready = true; + + /* pred may already be processed by the previous pred + * (this pred may be both node and previous pred's child) */ + if (pred->instr) + continue; + + /* insert pred only when all its successors have been inserted */ + ppir_node_foreach_succ(pred, dep) { + ppir_node *succ = dep->succ; + if (!succ->instr) { + ready = false; + break; + } + } + + if (ready) { + if (!ppir_do_node_to_instr(block, pred)) + return false; + } + } + + return true; +} + +static bool ppir_create_instr_from_node(ppir_compiler *comp) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_node, node, &block->node_list, list) { + if (ppir_node_is_root(node)) { + if (!ppir_do_node_to_instr(block, node)) + return false; + } + } + } + + return true; +} + +static void ppir_build_instr_dependency(ppir_compiler *comp) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { + ppir_node *node = instr->slots[i]; + if (node) { + ppir_node_foreach_pred(node, dep) { + ppir_node *pred = dep->pred; + if (pred->instr && pred->instr != instr) + ppir_instr_add_dep(instr, pred->instr); + } + } + } + } + } +} + +bool ppir_node_to_instr(ppir_compiler *comp) +{ + if (!ppir_create_instr_from_node(comp)) + return false; + ppir_instr_print_list(comp); + + ppir_build_instr_dependency(comp); + ppir_instr_print_dep(comp); + + return true; +} diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h new file mode 100644 index 00000000000..feb34895114 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/ppir.h @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2017 Lima Project + * Copyright (c) 2013 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +#ifndef LIMA_IR_PP_PPIR_H +#define LIMA_IR_PP_PPIR_H + +#include "util/u_math.h" +#include "util/list.h" + +#include "ir/lima_ir.h" + +typedef enum { + ppir_op_mov, + ppir_op_add, + + ppir_op_ddx, + ppir_op_ddy, + + ppir_op_mul, + ppir_op_rcp, + + ppir_op_sin_lut, + ppir_op_cos_lut, + + ppir_op_sum3, + ppir_op_sum4, + + ppir_op_normalize2, + ppir_op_normalize3, + ppir_op_normalize4, + + ppir_op_select, + + ppir_op_sin, + ppir_op_cos, + ppir_op_tan, + ppir_op_asin, + ppir_op_acos, + + ppir_op_atan, + ppir_op_atan2, + ppir_op_atan_pt1, + ppir_op_atan2_pt1, + ppir_op_atan_pt2, + + ppir_op_exp, + ppir_op_log, + ppir_op_exp2, + ppir_op_log2, + ppir_op_sqrt, + ppir_op_rsqrt, + + ppir_op_sign, + ppir_op_floor, + ppir_op_ceil, + ppir_op_fract, + ppir_op_mod, + ppir_op_min, + ppir_op_max, + + ppir_op_dot2, + ppir_op_dot3, + ppir_op_dot4, + + ppir_op_and, + ppir_op_or, + ppir_op_xor, + + ppir_op_lt, + ppir_op_gt, + ppir_op_le, + ppir_op_ge, + ppir_op_eq, + ppir_op_ne, + ppir_op_not, + + ppir_op_load_uniform, + ppir_op_load_varying, + ppir_op_load_coords, + ppir_op_load_texture, + ppir_op_load_temp, + + ppir_op_store_temp, + ppir_op_store_color, + + ppir_op_const, + + ppir_op_num, +} ppir_op; + +typedef enum { + ppir_node_type_alu, + ppir_node_type_const, + ppir_node_type_load, + ppir_node_type_store, + ppir_node_type_load_texture, +} ppir_node_type; + +typedef struct { + char *name; + ppir_node_type type; + int *slots; +} ppir_op_info; + +extern const ppir_op_info ppir_op_infos[]; + +typedef struct { + void *pred, *succ; + struct list_head pred_link; + struct list_head succ_link; +} ppir_dep; + +typedef struct ppir_node { + struct list_head list; + ppir_op op; + ppir_node_type type; + int index; + char name[16]; + bool printed; + struct ppir_instr *instr; + int instr_pos; + struct ppir_block *block; + + /* for scheduler */ + struct list_head succ_list; + struct list_head pred_list; +} ppir_node; + +typedef enum { + ppir_pipeline_reg_const0, + ppir_pipeline_reg_const1, + ppir_pipeline_reg_sampler, + ppir_pipeline_reg_uniform, + ppir_pipeline_reg_vmul, + ppir_pipeline_reg_fmul, + ppir_pipeline_reg_discard, /* varying load */ +} ppir_pipeline; + +typedef struct ppir_reg { + struct list_head list; + int index; + int num_components; + /* whether this reg has to start from the x component + * of a full physical reg, this is true for reg used + * in load/store instr which has no swizzle field + */ + bool is_head; + /* instr live range */ + int live_in, live_out; + bool spilled; +} ppir_reg; + +typedef enum { + ppir_target_ssa, + ppir_target_pipeline, + ppir_target_register, +} ppir_target; + +typedef struct ppir_src { + ppir_target type; + + union { + ppir_reg *ssa; + ppir_reg *reg; + ppir_pipeline pipeline; + }; + + uint8_t swizzle[4]; + bool absolute, negate; +} ppir_src; + +typedef enum { + ppir_outmod_none, + ppir_outmod_clamp_fraction, + ppir_outmod_clamp_positive, + ppir_outmod_round, +} ppir_outmod; + +typedef struct ppir_dest { + ppir_target type; + + union { + ppir_reg ssa; + ppir_reg *reg; + ppir_pipeline pipeline; + }; + + ppir_outmod modifier; + unsigned write_mask : 4; +} ppir_dest; + +typedef struct { + ppir_node node; + ppir_dest dest; + ppir_src src[3]; + int num_src; + int shift : 3; /* Only used for ppir_op_mul */ +} ppir_alu_node; + +typedef struct ppir_const { + union fi value[4]; + int num; +} ppir_const; + +typedef struct { + ppir_node node; + ppir_const constant; + ppir_dest dest; +} ppir_const_node; + +typedef struct { + ppir_node node; + int index; + int num_components; + ppir_dest dest; + ppir_src src; +} ppir_load_node; + +typedef struct { + ppir_node node; + int index; + int num_components; + ppir_src src; +} ppir_store_node; + +typedef struct { + ppir_node node; + ppir_dest dest; + ppir_src src_coords; + int sampler; + int sampler_dim; +} ppir_load_texture_node; + +enum ppir_instr_slot { + PPIR_INSTR_SLOT_VARYING, + PPIR_INSTR_SLOT_TEXLD, + PPIR_INSTR_SLOT_UNIFORM, + PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_ALU_SCL_MUL, + PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_ALU_SCL_ADD, + PPIR_INSTR_SLOT_ALU_COMBINE, + PPIR_INSTR_SLOT_STORE_TEMP, + PPIR_INSTR_SLOT_NUM, + PPIR_INSTR_SLOT_END, + PPIR_INSTR_SLOT_ALU_START = PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_ALU_END = PPIR_INSTR_SLOT_ALU_COMBINE, +}; + +typedef struct ppir_instr { + struct list_head list; + int index; + bool printed; + int seq; /* command sequence after schedule */ + + ppir_node *slots[PPIR_INSTR_SLOT_NUM]; + ppir_const constant[2]; + bool is_end; + + /* for scheduler */ + struct list_head succ_list; + struct list_head pred_list; + float reg_pressure; + int est; /* earliest start time */ + int parent_index; + bool scheduled; +} ppir_instr; + +typedef struct ppir_block { + struct list_head list; + struct list_head node_list; + struct list_head instr_list; + struct ppir_compiler *comp; + + /* for scheduler */ + int sched_instr_index; + int sched_instr_base; +} ppir_block; + +struct ra_regs; +struct lima_fs_shader_state; + +typedef struct ppir_compiler { + struct list_head block_list; + int cur_index; + int cur_instr_index; + + struct list_head reg_list; + + /* array for searching ssa/reg node */ + ppir_node **var_nodes; + unsigned reg_base; + + struct ra_regs *ra; + struct lima_fs_shader_state *prog; + + /* for scheduler */ + int sched_instr_base; + + /* for regalloc spilling debug */ + int force_spilling; +} ppir_compiler; + +void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask); +void ppir_node_add_dep(ppir_node *succ, ppir_node *pred); +void ppir_node_remove_dep(ppir_dep *dep); +void ppir_node_delete(ppir_node *node); +void ppir_node_print_prog(ppir_compiler *comp); +void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child); +void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src); +void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred); + +static inline bool ppir_node_is_root(ppir_node *node) +{ + return list_empty(&node->succ_list); +} + +static inline bool ppir_node_is_leaf(ppir_node *node) +{ + return list_empty(&node->pred_list); +} + +static inline bool ppir_node_has_single_succ(ppir_node *node) +{ + return list_is_singular(&node->succ_list); +} + +static inline ppir_node *ppir_node_first_succ(ppir_node *node) +{ + return list_first_entry(&node->succ_list, ppir_dep, succ_link)->succ; +} + +static inline bool ppir_node_has_single_pred(ppir_node *node) +{ + return list_is_singular(&node->pred_list); +} + +static inline ppir_node *ppir_node_first_pred(ppir_node *node) +{ + return list_first_entry(&node->pred_list, ppir_dep, pred_link)->pred; +} + +#define ppir_node_foreach_succ(node, dep) \ + list_for_each_entry(ppir_dep, dep, &node->succ_list, succ_link) +#define ppir_node_foreach_succ_safe(node, dep) \ + list_for_each_entry_safe(ppir_dep, dep, &node->succ_list, succ_link) +#define ppir_node_foreach_pred(node, dep) \ + list_for_each_entry(ppir_dep, dep, &node->pred_list, pred_link) +#define ppir_node_foreach_pred_safe(node, dep) \ + list_for_each_entry_safe(ppir_dep, dep, &node->pred_list, pred_link) + +#define ppir_node_to_alu(node) ((ppir_alu_node *)(node)) +#define ppir_node_to_const(node) ((ppir_const_node *)(node)) +#define ppir_node_to_load(node) ((ppir_load_node *)(node)) +#define ppir_node_to_store(node) ((ppir_store_node *)(node)) +#define ppir_node_to_load_texture(node) ((ppir_load_texture_node *)(node)) + +static inline ppir_dest *ppir_node_get_dest(ppir_node *node) +{ + switch (node->type) { + case ppir_node_type_alu: + return &ppir_node_to_alu(node)->dest; + case ppir_node_type_load: + return &ppir_node_to_load(node)->dest; + case ppir_node_type_const: + return &ppir_node_to_const(node)->dest; + case ppir_node_type_load_texture: + return &ppir_node_to_load_texture(node)->dest; + default: + return NULL; + } +} + +static inline void ppir_node_target_assign(ppir_src *src, ppir_dest *dest) +{ + src->type = dest->type; + switch (src->type) { + case ppir_target_ssa: + src->ssa = &dest->ssa; + break; + case ppir_target_register: + src->reg = dest->reg; + break; + case ppir_target_pipeline: + src->pipeline = dest->pipeline; + break; + } +} + +static inline bool ppir_node_target_equal(ppir_src *src, ppir_dest *dest) +{ + if (src->type != dest->type || + (src->type == ppir_target_ssa && src->ssa != &dest->ssa) || + (src->type == ppir_target_register && src->reg != dest->reg) || + (src->type == ppir_target_pipeline && src->pipeline != dest->pipeline)) + return false; + + return true; +} + +static inline int ppir_target_get_src_reg_index(ppir_src *src) +{ + switch (src->type) { + case ppir_target_ssa: + return src->ssa->index; + case ppir_target_register: + return src->reg->index; + case ppir_target_pipeline: + if (src->pipeline == ppir_pipeline_reg_discard) + return 15 * 4; + return (src->pipeline + 12) * 4; + } + + return -1; +} + +static inline int ppir_target_get_dest_reg_index(ppir_dest *dest) +{ + switch (dest->type) { + case ppir_target_ssa: + return dest->ssa.index; + case ppir_target_register: + return dest->reg->index; + case ppir_target_pipeline: + if (dest->pipeline == ppir_pipeline_reg_discard) + return 15 * 4; + return (dest->pipeline + 12) * 4; + } + + return -1; +} + +static inline bool ppir_target_is_scaler(ppir_dest *dest) +{ + switch (dest->type) { + case ppir_target_ssa: + return dest->ssa.num_components == 1; + case ppir_target_register: + /* only one bit in mask is set */ + if ((dest->write_mask & 0x3) == 0x3 || + (dest->write_mask & 0x5) == 0x5 || + (dest->write_mask & 0x9) == 0x9 || + (dest->write_mask & 0x6) == 0x6 || + (dest->write_mask & 0xa) == 0xa || + (dest->write_mask & 0xc) == 0xc) + return false; + else + return true; + case ppir_target_pipeline: + if (dest->pipeline == ppir_pipeline_reg_fmul) + return true; + else + return false; + default: + return false; + } +} + +ppir_instr *ppir_instr_create(ppir_block *block); +bool ppir_instr_insert_node(ppir_instr *instr, ppir_node *node); +void ppir_instr_add_dep(ppir_instr *succ, ppir_instr *pred); +void ppir_instr_print_list(ppir_compiler *comp); +void ppir_instr_print_dep(ppir_compiler *comp); +void ppir_instr_insert_mul_node(ppir_node *add, ppir_node *mul); + +#define ppir_instr_foreach_succ(instr, dep) \ + list_for_each_entry(ppir_dep, dep, &instr->succ_list, succ_link) +#define ppir_instr_foreach_succ_safe(instr, dep) \ + list_for_each_entry_safe(ppir_dep, dep, &instr->succ_list, succ_link) +#define ppir_instr_foreach_pred(instr, dep) \ + list_for_each_entry(ppir_dep, dep, &instr->pred_list, pred_link) +#define ppir_instr_foreach_pred_safe(instr, dep) \ + list_for_each_entry_safe(ppir_dep, dep, &instr->pred_list, pred_link) + +static inline bool ppir_instr_is_root(ppir_instr *instr) +{ + return list_empty(&instr->succ_list); +} + +static inline bool ppir_instr_is_leaf(ppir_instr *instr) +{ + return list_empty(&instr->pred_list); +} + +bool ppir_lower_prog(ppir_compiler *comp); +bool ppir_node_to_instr(ppir_compiler *comp); +bool ppir_schedule_prog(ppir_compiler *comp); +bool ppir_regalloc_prog(ppir_compiler *comp); +bool ppir_codegen_prog(ppir_compiler *comp); + +#endif diff --git a/src/gallium/drivers/lima/ir/pp/regalloc.c b/src/gallium/drivers/lima/ir/pp/regalloc.c new file mode 100644 index 00000000000..6aa71e91cfe --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/regalloc.c @@ -0,0 +1,757 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/ralloc.h" +#include "util/register_allocate.h" +#include "util/u_debug.h" + +#include "ppir.h" +#include "lima_context.h" + +#define PPIR_FULL_REG_NUM 6 + +#define PPIR_VEC1_REG_NUM (PPIR_FULL_REG_NUM * 4) /* x, y, z, w */ +#define PPIR_VEC2_REG_NUM (PPIR_FULL_REG_NUM * 3) /* xy, yz, zw */ +#define PPIR_VEC3_REG_NUM (PPIR_FULL_REG_NUM * 2) /* xyz, yzw */ +#define PPIR_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */ +#define PPIR_HEAD_VEC1_REG_NUM PPIR_FULL_REG_NUM /* x */ +#define PPIR_HEAD_VEC2_REG_NUM PPIR_FULL_REG_NUM /* xy */ +#define PPIR_HEAD_VEC3_REG_NUM PPIR_FULL_REG_NUM /* xyz */ +#define PPIR_HEAD_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */ + +#define PPIR_VEC1_REG_BASE 0 +#define PPIR_VEC2_REG_BASE (PPIR_VEC1_REG_BASE + PPIR_VEC1_REG_NUM) +#define PPIR_VEC3_REG_BASE (PPIR_VEC2_REG_BASE + PPIR_VEC2_REG_NUM) +#define PPIR_VEC4_REG_BASE (PPIR_VEC3_REG_BASE + PPIR_VEC3_REG_NUM) +#define PPIR_HEAD_VEC1_REG_BASE (PPIR_VEC4_REG_BASE + PPIR_VEC4_REG_NUM) +#define PPIR_HEAD_VEC2_REG_BASE (PPIR_HEAD_VEC1_REG_BASE + PPIR_HEAD_VEC1_REG_NUM) +#define PPIR_HEAD_VEC3_REG_BASE (PPIR_HEAD_VEC2_REG_BASE + PPIR_HEAD_VEC2_REG_NUM) +#define PPIR_HEAD_VEC4_REG_BASE (PPIR_HEAD_VEC3_REG_BASE + PPIR_HEAD_VEC3_REG_NUM) +#define PPIR_REG_COUNT (PPIR_HEAD_VEC4_REG_BASE + PPIR_HEAD_VEC4_REG_NUM) + +enum ppir_ra_reg_class { + ppir_ra_reg_class_vec1, + ppir_ra_reg_class_vec2, + ppir_ra_reg_class_vec3, + ppir_ra_reg_class_vec4, + + /* 4 reg class for load/store instr regs: + * load/store instr has no swizzle field, so the (virtual) register + * must be allocated at the beginning of a (physical) register, + */ + ppir_ra_reg_class_head_vec1, + ppir_ra_reg_class_head_vec2, + ppir_ra_reg_class_head_vec3, + ppir_ra_reg_class_head_vec4, + + ppir_ra_reg_class_num, +}; + +static const int ppir_ra_reg_base[ppir_ra_reg_class_num + 1] = { + [ppir_ra_reg_class_vec1] = PPIR_VEC1_REG_BASE, + [ppir_ra_reg_class_vec2] = PPIR_VEC2_REG_BASE, + [ppir_ra_reg_class_vec3] = PPIR_VEC3_REG_BASE, + [ppir_ra_reg_class_vec4] = PPIR_VEC4_REG_BASE, + [ppir_ra_reg_class_head_vec1] = PPIR_HEAD_VEC1_REG_BASE, + [ppir_ra_reg_class_head_vec2] = PPIR_HEAD_VEC2_REG_BASE, + [ppir_ra_reg_class_head_vec3] = PPIR_HEAD_VEC3_REG_BASE, + [ppir_ra_reg_class_head_vec4] = PPIR_HEAD_VEC4_REG_BASE, + [ppir_ra_reg_class_num] = PPIR_REG_COUNT, +}; + +static unsigned int * +ppir_ra_reg_q_values[ppir_ra_reg_class_num] = { + (unsigned int []) {1, 2, 3, 4, 1, 2, 3, 4}, + (unsigned int []) {2, 3, 3, 3, 1, 2, 3, 3}, + (unsigned int []) {2, 2, 2, 2, 1, 2, 2, 2}, + (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1}, + (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1}, + (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1}, + (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1}, + (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1}, +}; + +struct ra_regs *ppir_regalloc_init(void *mem_ctx) +{ + struct ra_regs *ret = ra_alloc_reg_set(mem_ctx, PPIR_REG_COUNT, false); + if (!ret) + return NULL; + + /* (x, y, z, w) (xy, yz, zw) (xyz, yzw) (xyzw) (x) (xy) (xyz) (xyzw) */ + static const int class_reg_num[ppir_ra_reg_class_num] = { + 4, 3, 2, 1, 1, 1, 1, 1, + }; + /* base reg (x, y, z, w) confliction with other regs */ + for (int h = 0; h < 4; h++) { + int base_reg_mask = 1 << h; + for (int i = 1; i < ppir_ra_reg_class_num; i++) { + int class_reg_base_mask = (1 << ((i % 4) + 1)) - 1; + for (int j = 0; j < class_reg_num[i]; j++) { + if (base_reg_mask & (class_reg_base_mask << j)) { + for (int k = 0; k < PPIR_FULL_REG_NUM; k++) { + ra_add_reg_conflict(ret, k * 4 + h, + ppir_ra_reg_base[i] + k * class_reg_num[i] + j); + } + } + } + } + } + /* build all other confliction by the base reg confliction */ + for (int i = 0; i < PPIR_VEC1_REG_NUM; i++) + ra_make_reg_conflicts_transitive(ret, i); + + for (int i = 0; i < ppir_ra_reg_class_num; i++) + ra_alloc_reg_class(ret); + + int reg_index = 0; + for (int i = 0; i < ppir_ra_reg_class_num; i++) { + while (reg_index < ppir_ra_reg_base[i + 1]) + ra_class_add_reg(ret, i, reg_index++); + } + + ra_set_finalize(ret, ppir_ra_reg_q_values); + return ret; +} + +static ppir_reg *get_src_reg(ppir_src *src) +{ + switch (src->type) { + case ppir_target_ssa: + return src->ssa; + case ppir_target_register: + return src->reg; + default: + return NULL; + } +} + +static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_node, node, &block->node_list, list) { + if (node->op == ppir_op_store_color) + continue; + + if (!node->instr || node->op == ppir_op_const) + continue; + + ppir_dest *dest = ppir_node_get_dest(node); + if (dest) { + ppir_reg *reg = NULL; + + if (dest->type == ppir_target_ssa) { + reg = &dest->ssa; + list_addtail(®->list, &comp->reg_list); + } + } + } + } +} + +static ppir_reg *ppir_regalloc_build_liveness_info(ppir_compiler *comp) +{ + ppir_reg *ret = NULL; + + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_node, node, &block->node_list, list) { + if (node->op == ppir_op_store_color) { + ppir_store_node *store = ppir_node_to_store(node); + if (store->src.type == ppir_target_ssa) + ret = store->src.ssa; + else + ret = store->src.reg; + ret->live_out = INT_MAX; + continue; + } + + if (!node->instr || node->op == ppir_op_const) + continue; + + /* update reg live_in from node dest (write) */ + ppir_dest *dest = ppir_node_get_dest(node); + if (dest) { + ppir_reg *reg = NULL; + + if (dest->type == ppir_target_ssa) { + reg = &dest->ssa; + } + else if (dest->type == ppir_target_register) + reg = dest->reg; + + if (reg && node->instr->seq < reg->live_in) + reg->live_in = node->instr->seq; + } + + /* update reg live_out from node src (read) */ + switch (node->type) { + case ppir_node_type_alu: + { + ppir_alu_node *alu = ppir_node_to_alu(node); + for (int i = 0; i < alu->num_src; i++) { + ppir_reg *reg = get_src_reg(alu->src + i); + if (reg && node->instr->seq > reg->live_out) + reg->live_out = node->instr->seq; + } + break; + } + case ppir_node_type_store: + { + ppir_store_node *store = ppir_node_to_store(node); + ppir_reg *reg = get_src_reg(&store->src); + if (reg && node->instr->seq > reg->live_out) + reg->live_out = node->instr->seq; + break; + } + case ppir_node_type_load: + { + ppir_load_node *load = ppir_node_to_load(node); + ppir_reg *reg = get_src_reg(&load->src); + if (reg && node->instr->seq > reg->live_out) + reg->live_out = node->instr->seq; + break; + } + case ppir_node_type_load_texture: + { + ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node); + ppir_reg *reg = get_src_reg(&load_tex->src_coords); + if (reg && node->instr->seq > reg->live_out) + reg->live_out = node->instr->seq; + break; + } + default: + break; + } + } + } + + return ret; +} + +static int get_phy_reg_index(int reg) +{ + int i; + + for (i = 0; i < ppir_ra_reg_class_num; i++) { + if (reg < ppir_ra_reg_base[i + 1]) { + reg -= ppir_ra_reg_base[i]; + break; + } + } + + if (i < ppir_ra_reg_class_head_vec1) + return reg / (4 - i) * 4 + reg % (4 - i); + else + return reg * 4; +} + +static void ppir_regalloc_print_result(ppir_compiler *comp) +{ + printf("======ppir regalloc result======\n"); + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + printf("%03d:", instr->index); + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { + ppir_node *node = instr->slots[i]; + if (!node) + continue; + + printf(" (%d|", node->index); + + ppir_dest *dest = ppir_node_get_dest(node); + if (dest) + printf("%d", ppir_target_get_dest_reg_index(dest)); + + printf("|"); + + switch (node->type) { + case ppir_node_type_alu: + { + ppir_alu_node *alu = ppir_node_to_alu(node); + for (int j = 0; j < alu->num_src; j++) { + if (j) + printf(" "); + + printf("%d", ppir_target_get_src_reg_index(alu->src + j)); + } + break; + } + case ppir_node_type_store: + { + ppir_store_node *store = ppir_node_to_store(node); + printf("%d", ppir_target_get_src_reg_index(&store->src)); + break; + } + case ppir_node_type_load: + { + ppir_load_node *load = ppir_node_to_load(node); + if (!load->num_components) + printf("%d", ppir_target_get_src_reg_index(&load->src)); + break; + } + case ppir_node_type_load_texture: + { + ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node); + printf("%d", ppir_target_get_src_reg_index(&load_tex->src_coords)); + break; + } + default: + break; + } + + printf(")"); + } + printf("\n"); + } + } + printf("--------------------------\n"); +} + +static bool create_new_instr_after(ppir_block *block, ppir_instr *ref, + ppir_node *node) +{ + ppir_instr *newinstr = ppir_instr_create(block); + if (unlikely(!newinstr)) + return false; + + list_del(&newinstr->list); + list_add(&newinstr->list, &ref->list); + + if (!ppir_instr_insert_node(newinstr, node)) + return false; + + list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) { + instr->seq++; + } + newinstr->seq = ref->seq+1; + newinstr->scheduled = true; + return true; +} + +static bool create_new_instr_before(ppir_block *block, ppir_instr *ref, + ppir_node *node) +{ + ppir_instr *newinstr = ppir_instr_create(block); + if (unlikely(!newinstr)) + return false; + + list_del(&newinstr->list); + list_addtail(&newinstr->list, &ref->list); + + if (!ppir_instr_insert_node(newinstr, node)) + return false; + + list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) { + instr->seq++; + } + newinstr->seq = ref->seq-1; + newinstr->scheduled = true; + return true; +} + +static ppir_alu_node* ppir_update_spilled_src(ppir_compiler *comp, + ppir_block *block, + ppir_node *node, ppir_src *src, + ppir_alu_node *move_alu) +{ + /* alu nodes may have multiple references to the same value. + * try to avoid unnecessary loads for the same alu node by + * saving the node resulting from the temporary load */ + if (move_alu) + goto update_src; + + /* alloc new node to load value */ + ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0); + if (!load_node) + return NULL; + list_addtail(&load_node->list, &node->list); + + ppir_load_node *load = ppir_node_to_load(load_node); + + load->index = -comp->prog->stack_size; /* index sizes are negative */ + load->num_components = src->reg->num_components; + + ppir_dest *ld_dest = &load->dest; + ld_dest->type = ppir_target_pipeline; + ld_dest->pipeline = ppir_pipeline_reg_uniform; + ld_dest->write_mask = 0xf; + + create_new_instr_before(block, node->instr, load_node); + + /* Create move node */ + ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0); + if (unlikely(!move_node)) + return false; + list_addtail(&move_node->list, &node->list); + + move_alu = ppir_node_to_alu(move_node); + + move_alu->num_src = 1; + move_alu->src->type = ppir_target_pipeline; + move_alu->src->pipeline = ppir_pipeline_reg_uniform; + for (int i = 0; i < 4; i++) + move_alu->src->swizzle[i] = i; + + ppir_dest *alu_dest = &move_alu->dest; + alu_dest->type = ppir_target_ssa; + alu_dest->ssa.num_components = 4; + alu_dest->ssa.live_in = INT_MAX; + alu_dest->ssa.live_out = 0; + alu_dest->write_mask = 0xf; + + list_addtail(&alu_dest->ssa.list, &comp->reg_list); + + if (!ppir_instr_insert_node(load_node->instr, move_node)) + return false; + + /* insert the new node as predecessor */ + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_remove_dep(dep); + ppir_node_add_dep(load_node, pred); + } + ppir_node_add_dep(node, move_node); + ppir_node_add_dep(move_node, load_node); + +update_src: + /* switch node src to use the new ssa instead */ + src->type = ppir_target_ssa; + src->ssa = &move_alu->dest.ssa; + + return move_alu; +} + +static ppir_reg *create_reg(ppir_compiler *comp, int num_components) +{ + ppir_reg *r = rzalloc(comp, ppir_reg); + if (!r) + return NULL; + + r->num_components = num_components; + r->live_in = INT_MAX; + r->live_out = 0; + r->is_head = false; + list_addtail(&r->list, &comp->reg_list); + + return r; +} + +static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block, + ppir_node *node, ppir_dest *dest) +{ + assert(dest != NULL); + ppir_reg *reg = NULL; + if (dest->type == ppir_target_register) { + reg = dest->reg; + reg->num_components = 4; + reg->spilled = true; + } + else { + reg = create_reg(comp, 4); + reg->spilled = true; + list_del(&dest->ssa.list); + } + + /* alloc new node to load value */ + ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0); + if (!load_node) + return NULL; + list_addtail(&load_node->list, &node->list); + + ppir_load_node *load = ppir_node_to_load(load_node); + + load->index = -comp->prog->stack_size; /* index sizes are negative */ + load->num_components = 4; + + load->dest.type = ppir_target_pipeline; + load->dest.pipeline = ppir_pipeline_reg_uniform; + load->dest.write_mask = 0xf; + + create_new_instr_before(block, node->instr, load_node); + + /* Create move node */ + ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0); + if (unlikely(!move_node)) + return false; + list_addtail(&move_node->list, &node->list); + + ppir_alu_node *move_alu = ppir_node_to_alu(move_node); + + move_alu->num_src = 1; + move_alu->src->type = ppir_target_pipeline; + move_alu->src->pipeline = ppir_pipeline_reg_uniform; + for (int i = 0; i < 4; i++) + move_alu->src->swizzle[i] = i; + + move_alu->dest.type = ppir_target_register; + move_alu->dest.reg = reg; + move_alu->dest.write_mask = 0x0f; + + if (!ppir_instr_insert_node(load_node->instr, move_node)) + return false; + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_remove_dep(dep); + ppir_node_add_dep(load_node, pred); + } + ppir_node_add_dep(node, move_node); + ppir_node_add_dep(move_node, load_node); + + dest->type = ppir_target_register; + dest->reg = reg; + + /* alloc new node to store value */ + ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0); + if (!store_node) + return false; + list_addtail(&store_node->list, &node->list); + + ppir_store_node *store = ppir_node_to_store(store_node); + + store->index = -comp->prog->stack_size; /* index sizes are negative */ + store->num_components = 4; + + store->src.type = ppir_target_register; + store->src.reg = dest->reg; + + /* insert the new node as successor */ + ppir_node_foreach_succ_safe(node, dep) { + ppir_node *succ = dep->succ; + ppir_node_remove_dep(dep); + ppir_node_add_dep(succ, store_node); + } + ppir_node_add_dep(store_node, node); + + create_new_instr_after(block, node->instr, store_node); + + return true; +} + +static bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_node, node, &block->node_list, list) { + + ppir_dest *dest = ppir_node_get_dest(node); + ppir_reg *reg = NULL; + if (dest) { + if (dest->type == ppir_target_ssa) + reg = &dest->ssa; + else if (dest->type == ppir_target_register) + reg = dest->reg; + + if (reg == chosen) + ppir_update_spilled_dest(comp, block, node, dest); + } + + switch (node->type) { + case ppir_node_type_alu: + { + /* alu nodes may have multiple references to the same value. + * try to avoid unnecessary loads for the same alu node by + * saving the node resulting from the temporary load */ + ppir_alu_node *move_alu = NULL; + ppir_alu_node *alu = ppir_node_to_alu(node); + for (int i = 0; i < alu->num_src; i++) { + reg = get_src_reg(alu->src + i); + if (reg == chosen) { + move_alu = ppir_update_spilled_src(comp, block, node, + alu->src + i, move_alu); + } + } + break; + } + case ppir_node_type_store: + { + ppir_store_node *store = ppir_node_to_store(node); + reg = get_src_reg(&store->src); + if (reg == chosen) { + ppir_update_spilled_src(comp, block, node, &store->src, NULL); + } + break; + } + case ppir_node_type_load: + { + ppir_load_node *load = ppir_node_to_load(node); + reg = get_src_reg(&load->src); + if (reg == chosen) { + ppir_update_spilled_src(comp, block, node, &load->src, NULL); + } + break; + } + case ppir_node_type_load_texture: + { + ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node); + reg = get_src_reg(&load_tex->src_coords); + if (reg == chosen) { + ppir_update_spilled_src(comp, block, node, &load_tex->src_coords, + NULL); + } + break; + } + default: + break; + } + } + } + + return true; +} + +static ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp, + struct ra_graph *g) +{ + int max_range = -1; + ppir_reg *chosen = NULL; + + list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { + int range = reg->live_out - reg->live_in; + + if (!reg->spilled && reg->live_out != INT_MAX && range > max_range) { + chosen = reg; + max_range = range; + } + } + + if (chosen) + chosen->spilled = true; + + return chosen; +} + +static void ppir_regalloc_reset_liveness_info(ppir_compiler *comp) +{ + list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { + reg->live_in = INT_MAX; + reg->live_out = 0; + } +} + +int lima_ppir_force_spilling = 0; + +static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled) +{ + ppir_reg *end_reg; + + ppir_regalloc_reset_liveness_info(comp); + end_reg = ppir_regalloc_build_liveness_info(comp); + + struct ra_graph *g = ra_alloc_interference_graph( + comp->ra, list_length(&comp->reg_list)); + + int n = 0, end_reg_index = 0; + list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { + int c = ppir_ra_reg_class_vec1 + (reg->num_components - 1); + if (reg->is_head) + c += 4; + if (reg == end_reg) + end_reg_index = n; + ra_set_node_class(g, n++, c); + } + + int n1 = 0; + list_for_each_entry(ppir_reg, reg1, &comp->reg_list, list) { + int n2 = n1 + 1; + list_for_each_entry_from(ppir_reg, reg2, reg1->list.next, + &comp->reg_list, list) { + bool interference = false; + if (reg1->live_in < reg2->live_in) { + if (reg1->live_out > reg2->live_in) + interference = true; + } + else if (reg1->live_in > reg2->live_in) { + if (reg2->live_out > reg1->live_in) + interference = true; + } + else + interference = true; + + if (interference) + ra_add_node_interference(g, n1, n2); + + n2++; + } + n1++; + } + + ra_set_node_reg(g, end_reg_index, ppir_ra_reg_base[ppir_ra_reg_class_vec4]); + + *spilled = false; + bool ok = ra_allocate(g); + if (!ok || (comp->force_spilling-- > 0)) { + ppir_reg *chosen = ppir_regalloc_choose_spill_node(comp, g); + if (chosen) { + /* stack_size will be used to assemble the frame reg in lima_draw. + * It is also be used in the spilling code, as negative indices + * starting from -1, to create stack addresses. */ + comp->prog->stack_size++; + ppir_regalloc_spill_reg(comp, chosen); + /* Ask the outer loop to call back in. */ + *spilled = true; + + ppir_debug("ppir: spilled register\n"); + goto err_out; + } + + ppir_error("ppir: regalloc fail\n"); + goto err_out; + } + + n = 0; + list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { + int reg_index = ra_get_node_reg(g, n++); + reg->index = get_phy_reg_index(reg_index); + } + + ralloc_free(g); + + if (lima_debug & LIMA_DEBUG_PP) + ppir_regalloc_print_result(comp); + + return true; + +err_out: + ralloc_free(g); + return false; +} + +bool ppir_regalloc_prog(ppir_compiler *comp) +{ + bool spilled = false; + comp->prog->stack_size = 0; + + /* Set from an environment variable to force spilling + * for debugging purposes, see lima_screen.c */ + comp->force_spilling = lima_ppir_force_spilling; + + ppir_regalloc_update_reglist_ssa(comp); + + /* this will most likely succeed in the first + * try, except for very complicated shaders */ + while (!ppir_regalloc_prog_try(comp, &spilled)) + if (!spilled) + return false; + + return true; +} diff --git a/src/gallium/drivers/lima/ir/pp/scheduler.c b/src/gallium/drivers/lima/ir/pp/scheduler.c new file mode 100644 index 00000000000..721504a810e --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/scheduler.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include + +#include "ppir.h" + + +static void ppir_schedule_calc_sched_info(ppir_instr *instr) +{ + int n = 0; + float extra_reg = 1.0; + + /* update all children's sched info */ + ppir_instr_foreach_pred(instr, dep) { + ppir_instr *pred = dep->pred; + + if (pred->reg_pressure < 0) + ppir_schedule_calc_sched_info(pred); + + if (instr->est < pred->est + 1) + instr->est = pred->est + 1; + + float reg_weight = 1.0 - 1.0 / list_length(&pred->succ_list); + if (extra_reg > reg_weight) + extra_reg = reg_weight; + + n++; + } + + /* leaf instr */ + if (!n) { + instr->reg_pressure = 0; + return; + } + + int i = 0, reg[n]; + ppir_instr_foreach_pred(instr, dep) { + ppir_instr *pred = dep->pred; + reg[i++] = pred->reg_pressure; + } + + /* sort */ + for (i = 0; i < n - 1; i++) { + for (int j = 0; j < n - i - 1; j++) { + if (reg[j] > reg[j + 1]) { + int tmp = reg[j + 1]; + reg[j + 1] = reg[j]; + reg[j] = tmp; + } + } + } + + for (i = 0; i < n; i++) { + int pressure = reg[i] + n - (i + 1); + if (pressure > instr->reg_pressure) + instr->reg_pressure = pressure; + } + + /* If all children of this instr have multi parents, then this + * instr need an extra reg to store its result. For example, + * it's not fair for parent has the same reg pressure as child + * if n==1 and child's successor>1, because we need 2 reg for + * this. + * + * But we can't add a full reg to the reg_pressure, because the + * last parent of a multi-successor child doesn't need an extra + * reg. For example, a single child (with multi successor) instr + * should has less reg pressure than a two children (with single + * successor) instr. + * + * extra reg = min(all child)(1.0 - 1.0 / num successor) + */ + instr->reg_pressure += extra_reg; +} + +static void ppir_insert_ready_list(struct list_head *ready_list, + ppir_instr *insert_instr) +{ + struct list_head *insert_pos = ready_list; + + list_for_each_entry(ppir_instr, instr, ready_list, list) { + if (insert_instr->parent_index < instr->parent_index || + (insert_instr->parent_index == instr->parent_index && + (insert_instr->reg_pressure < instr->reg_pressure || + (insert_instr->reg_pressure == instr->reg_pressure && + (insert_instr->est >= instr->est))))) { + insert_pos = &instr->list; + break; + } + } + + list_del(&insert_instr->list); + list_addtail(&insert_instr->list, insert_pos); +} + +static void ppir_schedule_ready_list(ppir_block *block, + struct list_head *ready_list) +{ + if (list_empty(ready_list)) + return; + + ppir_instr *instr = list_first_entry(ready_list, ppir_instr, list); + list_del(&instr->list); + + /* schedule the instr to the block instr list */ + list_add(&instr->list, &block->instr_list); + instr->scheduled = true; + block->sched_instr_index--; + instr->seq = block->sched_instr_base + block->sched_instr_index; + + ppir_instr_foreach_pred(instr, dep) { + ppir_instr *pred = dep->pred; + pred->parent_index = block->sched_instr_index; + + bool ready = true; + ppir_instr_foreach_succ(pred, dep) { + ppir_instr *succ = dep->succ; + if (!succ->scheduled) { + ready = false; + break; + } + } + /* all successor have been scheduled */ + if (ready) + ppir_insert_ready_list(ready_list, pred); + } + + ppir_schedule_ready_list(block, ready_list); +} + +/* Register sensitive schedule algorithm from paper: + * "Register-Sensitive Selection, Duplication, and Sequencing of Instructions" + * Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons + */ +static void ppir_schedule_block(ppir_block *block) +{ + /* move all instr to instr_list, block->instr_list will + * contain schedule result */ + struct list_head instr_list; + list_replace(&block->instr_list, &instr_list); + list_inithead(&block->instr_list); + + /* step 2 & 3 */ + list_for_each_entry(ppir_instr, instr, &instr_list, list) { + if (ppir_instr_is_root(instr)) + ppir_schedule_calc_sched_info(instr); + block->sched_instr_index++; + } + block->sched_instr_base = block->comp->sched_instr_base; + block->comp->sched_instr_base += block->sched_instr_index; + + /* step 4 */ + struct list_head ready_list; + list_inithead(&ready_list); + + /* step 5 */ + list_for_each_entry_safe(ppir_instr, instr, &instr_list, list) { + if (ppir_instr_is_root(instr)) { + instr->parent_index = INT_MAX; + ppir_insert_ready_list(&ready_list, instr); + } + } + + /* step 6 */ + ppir_schedule_ready_list(block, &ready_list); +} + +bool ppir_schedule_prog(ppir_compiler *comp) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + ppir_schedule_block(block); + } + + return true; +} diff --git a/src/gallium/drivers/lima/lima_bo.c b/src/gallium/drivers/lima/lima_bo.c new file mode 100644 index 00000000000..1d6dd720602 --- /dev/null +++ b/src/gallium/drivers/lima/lima_bo.c @@ -0,0 +1,337 @@ +/* + * Copyright (C) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include + +#include "xf86drm.h" +#include "drm-uapi/lima_drm.h" + +#include "util/u_hash_table.h" +#include "util/os_time.h" +#include "os/os_mman.h" + +#include "state_tracker/drm_driver.h" + +#include "lima_screen.h" +#include "lima_bo.h" + +#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) + +static unsigned handle_hash(void *key) +{ + return PTR_TO_UINT(key); +} + +static int handle_compare(void *key1, void *key2) +{ + return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); +} + +bool lima_bo_table_init(struct lima_screen *screen) +{ + screen->bo_handles = util_hash_table_create(handle_hash, handle_compare); + if (!screen->bo_handles) + return false; + + screen->bo_flink_names = util_hash_table_create(handle_hash, handle_compare); + if (!screen->bo_flink_names) + goto err_out0; + + mtx_init(&screen->bo_table_lock, mtx_plain); + return true; + +err_out0: + util_hash_table_destroy(screen->bo_handles); + return false; +} + +void lima_bo_table_fini(struct lima_screen *screen) +{ + mtx_destroy(&screen->bo_table_lock); + util_hash_table_destroy(screen->bo_handles); + util_hash_table_destroy(screen->bo_flink_names); +} + +static void lima_close_kms_handle(struct lima_screen *screen, uint32_t handle) +{ + struct drm_gem_close args = { + .handle = handle, + }; + + drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &args); +} + +static bool lima_bo_get_info(struct lima_bo *bo) +{ + struct drm_lima_gem_info req = { + .handle = bo->handle, + }; + + if(drmIoctl(bo->screen->fd, DRM_IOCTL_LIMA_GEM_INFO, &req)) + return false; + + bo->offset = req.offset; + bo->va = req.va; + return true; +} + +struct lima_bo *lima_bo_create(struct lima_screen *screen, + uint32_t size, uint32_t flags) +{ + struct lima_bo *bo; + struct drm_lima_gem_create req = { + .size = size, + .flags = flags, + }; + + if (!(bo = calloc(1, sizeof(*bo)))) + return NULL; + + if (drmIoctl(screen->fd, DRM_IOCTL_LIMA_GEM_CREATE, &req)) + goto err_out0; + + bo->screen = screen; + bo->size = req.size; + bo->handle = req.handle; + p_atomic_set(&bo->refcnt, 1); + + if (!lima_bo_get_info(bo)) + goto err_out1; + + return bo; + +err_out1: + lima_close_kms_handle(screen, bo->handle); +err_out0: + free(bo); + return NULL; +} + +void lima_bo_free(struct lima_bo *bo) +{ + if (!p_atomic_dec_zero(&bo->refcnt)) + return; + + struct lima_screen *screen = bo->screen; + mtx_lock(&screen->bo_table_lock); + util_hash_table_remove(screen->bo_handles, + (void *)(uintptr_t)bo->handle); + if (bo->flink_name) + util_hash_table_remove(screen->bo_flink_names, + (void *)(uintptr_t)bo->flink_name); + mtx_unlock(&screen->bo_table_lock); + + if (bo->map) + lima_bo_unmap(bo); + + lima_close_kms_handle(screen, bo->handle); + free(bo); +} + +void *lima_bo_map(struct lima_bo *bo) +{ + if (!bo->map) { + bo->map = os_mmap(0, bo->size, PROT_READ | PROT_WRITE, + MAP_SHARED, bo->screen->fd, bo->offset); + if (bo->map == MAP_FAILED) + bo->map = NULL; + } + + return bo->map; +} + +void lima_bo_unmap(struct lima_bo *bo) +{ + if (bo->map) { + os_munmap(bo->map, bo->size); + bo->map = NULL; + } +} + +bool lima_bo_export(struct lima_bo *bo, struct winsys_handle *handle) +{ + struct lima_screen *screen = bo->screen; + + switch (handle->type) { + case WINSYS_HANDLE_TYPE_SHARED: + if (!bo->flink_name) { + struct drm_gem_flink flink = { + .handle = bo->handle, + .name = 0, + }; + if (drmIoctl(screen->fd, DRM_IOCTL_GEM_FLINK, &flink)) + return false; + + bo->flink_name = flink.name; + + mtx_lock(&screen->bo_table_lock); + util_hash_table_set(screen->bo_flink_names, + (void *)(uintptr_t)bo->flink_name, bo); + mtx_unlock(&screen->bo_table_lock); + } + handle->handle = bo->flink_name; + return true; + + case WINSYS_HANDLE_TYPE_KMS: + mtx_lock(&screen->bo_table_lock); + util_hash_table_set(screen->bo_handles, + (void *)(uintptr_t)bo->handle, bo); + mtx_unlock(&screen->bo_table_lock); + + handle->handle = bo->handle; + return true; + + case WINSYS_HANDLE_TYPE_FD: + if (drmPrimeHandleToFD(screen->fd, bo->handle, DRM_CLOEXEC, + (int*)&handle->handle)) + return false; + + mtx_lock(&screen->bo_table_lock); + util_hash_table_set(screen->bo_handles, + (void *)(uintptr_t)bo->handle, bo); + mtx_unlock(&screen->bo_table_lock); + return true; + + default: + return false; + } +} + +struct lima_bo *lima_bo_import(struct lima_screen *screen, + struct winsys_handle *handle) +{ + struct lima_bo *bo = NULL; + struct drm_gem_open req = {0}; + uint32_t dma_buf_size = 0; + unsigned h = handle->handle; + + mtx_lock(&screen->bo_table_lock); + + /* Convert a DMA buf handle to a KMS handle now. */ + if (handle->type == WINSYS_HANDLE_TYPE_FD) { + uint32_t prime_handle; + off_t size; + + /* Get a KMS handle. */ + if (drmPrimeFDToHandle(screen->fd, h, &prime_handle)) { + mtx_unlock(&screen->bo_table_lock); + return NULL; + } + + /* Query the buffer size. */ + size = lseek(h, 0, SEEK_END); + if (size == (off_t)-1) { + mtx_unlock(&screen->bo_table_lock); + lima_close_kms_handle(screen, prime_handle); + return NULL; + } + lseek(h, 0, SEEK_SET); + + dma_buf_size = size; + h = prime_handle; + } + + switch (handle->type) { + case WINSYS_HANDLE_TYPE_SHARED: + bo = util_hash_table_get(screen->bo_flink_names, + (void *)(uintptr_t)h); + break; + case WINSYS_HANDLE_TYPE_KMS: + case WINSYS_HANDLE_TYPE_FD: + bo = util_hash_table_get(screen->bo_handles, + (void *)(uintptr_t)h); + break; + default: + mtx_unlock(&screen->bo_table_lock); + return NULL; + } + + if (bo) { + p_atomic_inc(&bo->refcnt); + mtx_unlock(&screen->bo_table_lock); + return bo; + } + + if (!(bo = calloc(1, sizeof(*bo)))) { + mtx_unlock(&screen->bo_table_lock); + if (handle->type == WINSYS_HANDLE_TYPE_FD) + lima_close_kms_handle(screen, h); + return NULL; + } + + bo->screen = screen; + p_atomic_set(&bo->refcnt, 1); + + switch (handle->type) { + case WINSYS_HANDLE_TYPE_SHARED: + req.name = h; + if (drmIoctl(screen->fd, DRM_IOCTL_GEM_OPEN, &req)) { + mtx_unlock(&screen->bo_table_lock); + free(bo); + return NULL; + } + bo->handle = req.handle; + bo->flink_name = h; + bo->size = req.size; + break; + case WINSYS_HANDLE_TYPE_FD: + bo->handle = h; + bo->size = dma_buf_size; + break; + default: + /* not possible */ + assert(0); + } + + if (lima_bo_get_info(bo)) { + if (handle->type == WINSYS_HANDLE_TYPE_SHARED) + util_hash_table_set(screen->bo_flink_names, + (void *)(uintptr_t)bo->flink_name, bo); + util_hash_table_set(screen->bo_handles, + (void*)(uintptr_t)bo->handle, bo); + } + else { + lima_close_kms_handle(screen, bo->handle); + free(bo); + bo = NULL; + } + + mtx_unlock(&screen->bo_table_lock); + + return bo; +} + +bool lima_bo_wait(struct lima_bo *bo, uint32_t op, uint64_t timeout_ns) +{ + int64_t abs_timeout = os_time_get_absolute_timeout(timeout_ns); + struct drm_lima_gem_wait req = { + .handle = bo->handle, + .op = op, + .timeout_ns = abs_timeout, + }; + + return drmIoctl(bo->screen->fd, DRM_IOCTL_LIMA_GEM_WAIT, &req) == 0; +} diff --git a/src/gallium/drivers/lima/lima_bo.h b/src/gallium/drivers/lima/lima_bo.h new file mode 100644 index 00000000000..3f440b3b014 --- /dev/null +++ b/src/gallium/drivers/lima/lima_bo.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2018-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef H_LIMA_BO +#define H_LIMA_BO + +#include +#include + +#include "util/u_atomic.h" + +struct lima_bo { + struct lima_screen *screen; + int refcnt; + + uint32_t size; + uint32_t handle; + uint64_t offset; + uint32_t flink_name; + + void *map; + uint32_t va; +}; + +bool lima_bo_table_init(struct lima_screen *screen); +void lima_bo_table_fini(struct lima_screen *screen); + +struct lima_bo *lima_bo_create(struct lima_screen *screen, uint32_t size, + uint32_t flags); +void lima_bo_free(struct lima_bo *bo); + +static inline void lima_bo_reference(struct lima_bo *bo) +{ + p_atomic_inc(&bo->refcnt); +} + +void *lima_bo_map(struct lima_bo *bo); +void lima_bo_unmap(struct lima_bo *bo); + +bool lima_bo_export(struct lima_bo *bo, struct winsys_handle *handle); +struct lima_bo *lima_bo_import(struct lima_screen *screen, + struct winsys_handle *handle); + +bool lima_bo_wait(struct lima_bo *bo, uint32_t op, uint64_t timeout_ns); + +#endif diff --git a/src/gallium/drivers/lima/lima_context.c b/src/gallium/drivers/lima/lima_context.c new file mode 100644 index 00000000000..35bf8c89a9c --- /dev/null +++ b/src/gallium/drivers/lima/lima_context.c @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" +#include "util/u_math.h" +#include "util/u_debug.h" +#include "util/ralloc.h" +#include "util/u_inlines.h" +#include "util/u_suballoc.h" +#include "util/hash_table.h" + +#include "lima_screen.h" +#include "lima_context.h" +#include "lima_resource.h" +#include "lima_bo.h" +#include "lima_submit.h" +#include "lima_util.h" +#include "lima_fence.h" + +#include +#include + +int lima_ctx_num_plb = LIMA_CTX_PLB_DEF_NUM; + +uint32_t +lima_ctx_buff_va(struct lima_context *ctx, enum lima_ctx_buff buff, unsigned submit) +{ + struct lima_ctx_buff_state *cbs = ctx->buffer_state + buff; + struct lima_resource *res = lima_resource(cbs->res); + + if (submit & LIMA_CTX_BUFF_SUBMIT_GP) + lima_submit_add_bo(ctx->gp_submit, res->bo, LIMA_SUBMIT_BO_READ); + if (submit & LIMA_CTX_BUFF_SUBMIT_PP) + lima_submit_add_bo(ctx->pp_submit, res->bo, LIMA_SUBMIT_BO_READ); + + return res->bo->va + cbs->offset; +} + +void * +lima_ctx_buff_map(struct lima_context *ctx, enum lima_ctx_buff buff) +{ + struct lima_ctx_buff_state *cbs = ctx->buffer_state + buff; + struct lima_resource *res = lima_resource(cbs->res); + + return lima_bo_map(res->bo) + cbs->offset; +} + +void * +lima_ctx_buff_alloc(struct lima_context *ctx, enum lima_ctx_buff buff, + unsigned size, bool uploader) +{ + struct lima_ctx_buff_state *cbs = ctx->buffer_state + buff; + void *ret = NULL; + + cbs->size = align(size, 0x40); + + if (uploader) + u_upload_alloc(ctx->uploader, 0, cbs->size, 0x40, &cbs->offset, + &cbs->res, &ret); + else + u_suballocator_alloc(ctx->suballocator, cbs->size, 0x10, + &cbs->offset, &cbs->res); + + return ret; +} + +static int +lima_context_create_drm_ctx(struct lima_screen *screen) +{ + struct drm_lima_ctx_create req = {0}; + + int ret = drmIoctl(screen->fd, DRM_IOCTL_LIMA_CTX_CREATE, &req); + if (ret) + return errno; + + return req.id; +} + +static void +lima_context_free_drm_ctx(struct lima_screen *screen, int id) +{ + struct drm_lima_ctx_free req = { + .id = id, + }; + + drmIoctl(screen->fd, DRM_IOCTL_LIMA_CTX_FREE, &req); +} + +static void +lima_context_destroy(struct pipe_context *pctx) +{ + struct lima_context *ctx = lima_context(pctx); + struct lima_screen *screen = lima_screen(pctx->screen); + + if (ctx->pp_submit) + lima_submit_free(ctx->pp_submit); + if (ctx->gp_submit) + lima_submit_free(ctx->gp_submit); + + for (int i = 0; i < lima_ctx_buff_num; i++) + pipe_resource_reference(&ctx->buffer_state[i].res, NULL); + + lima_state_fini(ctx); + + if (ctx->suballocator) + u_suballocator_destroy(ctx->suballocator); + + if (ctx->uploader) + u_upload_destroy(ctx->uploader); + + slab_destroy_child(&ctx->transfer_pool); + + for (int i = 0; i < LIMA_CTX_PLB_MAX_NUM; i++) { + if (ctx->plb[i]) + lima_bo_free(ctx->plb[i]); + } + + if (ctx->plb_gp_stream) + lima_bo_free(ctx->plb_gp_stream); + + if (ctx->plb_pp_stream) + assert(!_mesa_hash_table_num_entries(ctx->plb_pp_stream)); + + lima_context_free_drm_ctx(screen, ctx->id); + + ralloc_free(ctx); +} + +static uint32_t +plb_pp_stream_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct lima_ctx_plb_pp_stream_key)); +} + +static bool +plb_pp_stream_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct lima_ctx_plb_pp_stream_key)) == 0; +} + +struct pipe_context * +lima_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +{ + struct lima_screen *screen = lima_screen(pscreen); + struct lima_context *ctx; + + ctx = rzalloc(screen, struct lima_context); + if (!ctx) + return NULL; + + ctx->id = lima_context_create_drm_ctx(screen); + if (ctx->id < 0) { + ralloc_free(ctx); + return NULL; + } + + ctx->base.screen = pscreen; + ctx->base.destroy = lima_context_destroy; + + lima_resource_context_init(ctx); + lima_fence_context_init(ctx); + lima_state_init(ctx); + lima_draw_init(ctx); + lima_program_init(ctx); + lima_query_init(ctx); + + slab_create_child(&ctx->transfer_pool, &screen->transfer_pool); + + ctx->uploader = u_upload_create_default(&ctx->base); + if (!ctx->uploader) + goto err_out; + ctx->base.stream_uploader = ctx->uploader; + ctx->base.const_uploader = ctx->uploader; + + /* for varying output which need not mmap */ + ctx->suballocator = + u_suballocator_create(&ctx->base, 1024 * 1024, 0, + PIPE_USAGE_STREAM, 0, false); + if (!ctx->suballocator) + goto err_out; + + util_dynarray_init(&ctx->vs_cmd_array, ctx); + util_dynarray_init(&ctx->plbu_cmd_array, ctx); + + if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI450) + ctx->plb_max_blk = 4096; + else + ctx->plb_max_blk = 512; + ctx->plb_size = ctx->plb_max_blk * LIMA_CTX_PLB_BLK_SIZE; + ctx->plb_gp_size = ctx->plb_max_blk * 4; + + for (int i = 0; i < lima_ctx_num_plb; i++) { + ctx->plb[i] = lima_bo_create(screen, ctx->plb_size, 0); + if (!ctx->plb[i]) + goto err_out; + } + + unsigned plb_gp_stream_size = + align(ctx->plb_gp_size * lima_ctx_num_plb, LIMA_PAGE_SIZE); + ctx->plb_gp_stream = + lima_bo_create(screen, plb_gp_stream_size, 0); + if (!ctx->plb_gp_stream) + goto err_out; + lima_bo_map(ctx->plb_gp_stream); + + /* plb gp stream is static for any framebuffer */ + for (int i = 0; i < lima_ctx_num_plb; i++) { + uint32_t *plb_gp_stream = ctx->plb_gp_stream->map + i * ctx->plb_gp_size; + for (int j = 0; j < ctx->plb_max_blk; j++) + plb_gp_stream[j] = ctx->plb[i]->va + LIMA_CTX_PLB_BLK_SIZE * j; + } + + if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) { + ctx->plb_pp_stream = _mesa_hash_table_create( + ctx, plb_pp_stream_hash, plb_pp_stream_compare); + if (!ctx->plb_pp_stream) + goto err_out; + } + + ctx->gp_submit = lima_submit_create(ctx, LIMA_PIPE_GP); + if (!ctx->gp_submit) + goto err_out; + + ctx->pp_submit = lima_submit_create(ctx, LIMA_PIPE_PP); + if (!ctx->pp_submit) + goto err_out; + + return &ctx->base; + +err_out: + lima_context_destroy(&ctx->base); + return NULL; +} + +bool +lima_need_flush(struct lima_context *ctx, struct lima_bo *bo, bool write) +{ + return lima_submit_has_bo(ctx->gp_submit, bo, write) || + lima_submit_has_bo(ctx->pp_submit, bo, write); +} diff --git a/src/gallium/drivers/lima/lima_context.h b/src/gallium/drivers/lima/lima_context.h new file mode 100644 index 00000000000..6b2612d75a7 --- /dev/null +++ b/src/gallium/drivers/lima/lima_context.h @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef H_LIMA_CONTEXT +#define H_LIMA_CONTEXT + +#include "util/slab.h" +#include "util/u_dynarray.h" + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +struct lima_context_framebuffer { + struct pipe_surface *cbuf, *zsbuf; + int width, height; + int tiled_w, tiled_h; + int shift_w, shift_h; + int block_w, block_h; + int shift_min; + int samples; +}; + +struct lima_context_clear { + unsigned buffers; + uint32_t color_8pc; + uint32_t depth; + uint32_t stencil; + uint64_t color_16pc; +}; + +struct lima_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state base; +}; + +struct lima_fs_shader_state { + void *shader; + int shader_size; + int stack_size; + struct lima_bo *bo; +}; + +#define LIMA_MAX_VARYING_NUM 13 + +struct lima_varying_info { + int components; + int component_size; + int offset; +}; + +struct lima_vs_shader_state { + void *shader; + int shader_size; + int prefetch; + + /* pipe_constant_buffer.size is aligned with some pad bytes, + * so record here for the real start place of gpir lowered + * uniforms */ + int uniform_pending_offset; + + void *constant; + int constant_size; + + struct lima_varying_info varying[LIMA_MAX_VARYING_NUM]; + int varying_stride; + int num_varying; + + struct lima_bo *bo; +}; + +struct lima_rasterizer_state { + struct pipe_rasterizer_state base; +}; + +struct lima_blend_state { + struct pipe_blend_state base; +}; + +struct lima_vertex_element_state { + struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; + unsigned num_elements; +}; + +struct lima_context_vertex_buffer { + struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + unsigned count; + uint32_t enabled_mask; +}; + +struct lima_context_viewport_state { + struct pipe_viewport_state transform; + float x, y, width, height; + float near, far; +}; + +struct lima_context_constant_buffer { + const void *buffer; + uint32_t size; + bool dirty; +}; + +enum lima_ctx_buff { + lima_ctx_buff_sh_varying, + lima_ctx_buff_sh_gl_pos, + lima_ctx_buff_gp_varying_info, + lima_ctx_buff_gp_attribute_info, + lima_ctx_buff_gp_uniform, + lima_ctx_buff_gp_vs_cmd, + lima_ctx_buff_gp_plbu_cmd, + lima_ctx_buff_pp_plb_rsw, + lima_ctx_buff_pp_uniform_array, + lima_ctx_buff_pp_uniform, + lima_ctx_buff_pp_tex_desc, + lima_ctx_buff_num, +}; + +struct lima_ctx_buff_state { + struct pipe_resource *res; + unsigned offset; + unsigned size; +}; + +struct lima_texture_stateobj { + struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; +}; + +struct lima_ctx_plb_pp_stream_key { + uint32_t plb_index; + uint32_t tiled_w; + uint32_t tiled_h; +}; + +struct lima_ctx_plb_pp_stream { + struct lima_ctx_plb_pp_stream_key key; + uint32_t refcnt; + struct lima_bo *bo; + uint32_t offset[4]; +}; + +struct lima_damage_state { + struct pipe_scissor_state *region; + unsigned num_region; + bool aligned; +}; + +struct lima_pp_stream_state { + struct lima_bo *bo; + uint32_t bo_offset; + uint32_t offset[8]; +}; + +struct lima_context { + struct pipe_context base; + + enum { + LIMA_CONTEXT_DIRTY_FRAMEBUFFER = (1 << 0), + LIMA_CONTEXT_DIRTY_CLEAR = (1 << 1), + LIMA_CONTEXT_DIRTY_SHADER_VERT = (1 << 2), + LIMA_CONTEXT_DIRTY_SHADER_FRAG = (1 << 3), + LIMA_CONTEXT_DIRTY_VERTEX_ELEM = (1 << 4), + LIMA_CONTEXT_DIRTY_VERTEX_BUFF = (1 << 5), + LIMA_CONTEXT_DIRTY_VIEWPORT = (1 << 6), + LIMA_CONTEXT_DIRTY_SCISSOR = (1 << 7), + LIMA_CONTEXT_DIRTY_RASTERIZER = (1 << 8), + LIMA_CONTEXT_DIRTY_ZSA = (1 << 9), + LIMA_CONTEXT_DIRTY_BLEND_COLOR = (1 << 10), + LIMA_CONTEXT_DIRTY_BLEND = (1 << 11), + LIMA_CONTEXT_DIRTY_STENCIL_REF = (1 << 12), + LIMA_CONTEXT_DIRTY_CONST_BUFF = (1 << 13), + LIMA_CONTEXT_DIRTY_TEXTURES = (1 << 14), + } dirty; + + struct u_upload_mgr *uploader; + struct u_suballocator *suballocator; + + struct slab_child_pool transfer_pool; + + struct lima_context_framebuffer framebuffer; + struct lima_context_viewport_state viewport; + struct pipe_scissor_state scissor; + struct lima_context_clear clear; + struct lima_vs_shader_state *vs; + struct lima_fs_shader_state *fs; + struct lima_vertex_element_state *vertex_elements; + struct lima_context_vertex_buffer vertex_buffers; + struct lima_rasterizer_state *rasterizer; + struct lima_depth_stencil_alpha_state *zsa; + struct pipe_blend_color blend_color; + struct lima_blend_state *blend; + struct pipe_stencil_ref stencil_ref; + struct lima_context_constant_buffer const_buffer[PIPE_SHADER_TYPES]; + struct lima_texture_stateobj tex_stateobj; + struct lima_damage_state damage; + struct lima_pp_stream_state pp_stream; + + unsigned min_index; + unsigned max_index; + + #define LIMA_CTX_PLB_MIN_NUM 1 + #define LIMA_CTX_PLB_MAX_NUM 4 + #define LIMA_CTX_PLB_DEF_NUM 2 + #define LIMA_CTX_PLB_BLK_SIZE 512 + unsigned plb_max_blk; + unsigned plb_size; + unsigned plb_gp_size; + + struct lima_bo *plb[LIMA_CTX_PLB_MAX_NUM]; + struct lima_bo *plb_gp_stream; + struct hash_table *plb_pp_stream; + uint32_t plb_index; + + struct lima_ctx_buff_state buffer_state[lima_ctx_buff_num]; + + struct util_dynarray vs_cmd_array; + struct util_dynarray plbu_cmd_array; + + struct lima_submit *gp_submit; + struct lima_submit *pp_submit; + + int id; +}; + +static inline struct lima_context * +lima_context(struct pipe_context *pctx) +{ + return (struct lima_context *)pctx; +} + +struct lima_sampler_state { + struct pipe_sampler_state base; +}; + +static inline struct lima_sampler_state * +lima_sampler_state(struct pipe_sampler_state *psstate) +{ + return (struct lima_sampler_state *)psstate; +} + +struct lima_sampler_view { + struct pipe_sampler_view base; +}; + +static inline struct lima_sampler_view * +lima_sampler_view(struct pipe_sampler_view *psview) +{ + return (struct lima_sampler_view *)psview; +} + +#define LIMA_CTX_BUFF_SUBMIT_GP (1 << 0) +#define LIMA_CTX_BUFF_SUBMIT_PP (1 << 1) + +uint32_t lima_ctx_buff_va(struct lima_context *ctx, enum lima_ctx_buff buff, + unsigned submit); +void *lima_ctx_buff_map(struct lima_context *ctx, enum lima_ctx_buff buff); +void *lima_ctx_buff_alloc(struct lima_context *ctx, enum lima_ctx_buff buff, + unsigned size, bool uploader); + +void lima_state_init(struct lima_context *ctx); +void lima_state_fini(struct lima_context *ctx); +void lima_draw_init(struct lima_context *ctx); +void lima_program_init(struct lima_context *ctx); +void lima_query_init(struct lima_context *ctx); + +struct pipe_context * +lima_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); + +void lima_flush(struct lima_context *ctx); + +bool lima_need_flush(struct lima_context *ctx, struct lima_bo *bo, bool write); + +#endif diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c new file mode 100644 index 00000000000..bbc9764be0d --- /dev/null +++ b/src/gallium/drivers/lima/lima_draw.c @@ -0,0 +1,1648 @@ +/* + * Copyright (c) 2011-2013 Luc Verhaegen + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_math.h" +#include "util/u_format.h" +#include "util/u_debug.h" +#include "util/u_half.h" +#include "util/u_helpers.h" +#include "util/u_inlines.h" +#include "util/u_pack_color.h" +#include "util/hash_table.h" +#include "util/u_upload_mgr.h" +#include "util/u_prim.h" +#include "util/u_vbuf.h" + +#include "lima_context.h" +#include "lima_screen.h" +#include "lima_resource.h" +#include "lima_program.h" +#include "lima_bo.h" +#include "lima_submit.h" +#include "lima_texture.h" +#include "lima_util.h" +#include "lima_fence.h" + +#include + +struct lima_gp_frame_reg { + uint32_t vs_cmd_start; + uint32_t vs_cmd_end; + uint32_t plbu_cmd_start; + uint32_t plbu_cmd_end; + uint32_t tile_heap_start; + uint32_t tile_heap_end; +}; + +struct lima_pp_frame_reg { + uint32_t plbu_array_address; + uint32_t render_address; + uint32_t unused_0; + uint32_t flags; + uint32_t clear_value_depth; + uint32_t clear_value_stencil; + uint32_t clear_value_color; + uint32_t clear_value_color_1; + uint32_t clear_value_color_2; + uint32_t clear_value_color_3; + uint32_t width; + uint32_t height; + uint32_t fragment_stack_address; + uint32_t fragment_stack_size; + uint32_t unused_1; + uint32_t unused_2; + uint32_t one; + uint32_t supersampled_height; + uint32_t dubya; + uint32_t onscreen; + uint32_t blocking; + uint32_t scale; + uint32_t foureight; +}; + +struct lima_pp_wb_reg { + uint32_t type; + uint32_t address; + uint32_t pixel_format; + uint32_t downsample_factor; + uint32_t pixel_layout; + uint32_t pitch; + uint32_t mrt_bits; + uint32_t mrt_pitch; + uint32_t zero; + uint32_t unused0; + uint32_t unused1; + uint32_t unused2; +}; + +struct lima_render_state { + uint32_t blend_color_bg; + uint32_t blend_color_ra; + uint32_t alpha_blend; + uint32_t depth_test; + uint32_t depth_range; + uint32_t stencil_front; + uint32_t stencil_back; + uint32_t stencil_test; + uint32_t multi_sample; + uint32_t shader_address; + uint32_t varying_types; + uint32_t uniforms_address; + uint32_t textures_address; + uint32_t aux0; + uint32_t aux1; + uint32_t varyings_address; +}; + +/* plbu commands */ +#define PLBU_CMD_BEGIN(max) { \ + int i = 0, max_n = max; \ + uint32_t *plbu_cmd = util_dynarray_grow_cap(&ctx->plbu_cmd_array, max_n * 4); + +#define PLBU_CMD_END() \ + assert(i <= max_n); \ + ctx->plbu_cmd_array.size += i * 4; \ +} + +#define PLBU_CMD(v1, v2) \ + do { \ + plbu_cmd[i++] = v1; \ + plbu_cmd[i++] = v2; \ + } while (0) + +#define PLBU_CMD_BLOCK_STEP(shift_min, shift_h, shift_w) \ + PLBU_CMD(((shift_min) << 28) | ((shift_h) << 16) | (shift_w), 0x1000010C) +#define PLBU_CMD_TILED_DIMENSIONS(tiled_w, tiled_h) \ + PLBU_CMD((((tiled_w) - 1) << 24) | (((tiled_h) - 1) << 8), 0x10000109) +#define PLBU_CMD_BLOCK_STRIDE(block_w) PLBU_CMD(block_w, 0x30000000) +#define PLBU_CMD_ARRAY_ADDRESS(gp_stream, block_num) \ + PLBU_CMD(gp_stream, 0x28000000 | ((block_num) - 1) | 1) +#define PLBU_CMD_VIEWPORT_X(v) PLBU_CMD(v, 0x10000107) +#define PLBU_CMD_VIEWPORT_W(v) PLBU_CMD(v, 0x10000108) +#define PLBU_CMD_VIEWPORT_Y(v) PLBU_CMD(v, 0x10000105) +#define PLBU_CMD_VIEWPORT_H(v) PLBU_CMD(v, 0x10000106) +#define PLBU_CMD_ARRAYS_SEMAPHORE_BEGIN() PLBU_CMD(0x00010002, 0x60000000) +#define PLBU_CMD_ARRAYS_SEMAPHORE_END() PLBU_CMD(0x00010001, 0x60000000) +#define PLBU_CMD_PRIMITIVE_SETUP(low_prim, cull, index_size) \ + PLBU_CMD(((low_prim) ? 0x00003200 : 0x00002200) | (cull) | ((index_size) << 9), 0x1000010B) +#define PLBU_CMD_RSW_VERTEX_ARRAY(rsw, gl_pos) \ + PLBU_CMD(rsw, 0x80000000 | ((gl_pos) >> 4)) +#define PLBU_CMD_SCISSORS(minx, maxx, miny, maxy) \ + PLBU_CMD(((minx) << 30) | ((maxy) - 1) << 15 | (miny), \ + 0x70000000 | ((maxx) - 1) << 13 | ((minx) >> 2)) +#define PLBU_CMD_UNKNOWN1() PLBU_CMD(0x00000000, 0x1000010A) +#define PLBU_CMD_UNKNOWN2() PLBU_CMD(0x00000200, 0x1000010B) +#define PLBU_CMD_LOW_PRIM_SIZE(v) PLBU_CMD(v, 0x1000010D) +#define PLBU_CMD_DEPTH_RANGE_NEAR(v) PLBU_CMD(v, 0x1000010E) +#define PLBU_CMD_DEPTH_RANGE_FAR(v) PLBU_CMD(v, 0x1000010F) +#define PLBU_CMD_INDEXED_DEST(gl_pos) PLBU_CMD(gl_pos, 0x10000100) +#define PLBU_CMD_INDICES(va) PLBU_CMD(va, 0x10000101) +#define PLBU_CMD_DRAW_ARRAYS(mode, start, count) \ + PLBU_CMD(((count) << 24) | (start), (((mode) & 0x1F) << 16) | ((count) >> 8)) +#define PLBU_CMD_DRAW_ELEMENTS(mode, start, count) \ + PLBU_CMD(((count) << 24) | (start), \ + 0x00200000 | (((mode) & 0x1F) << 16) | ((count) >> 8)) + +/* vs commands */ +#define VS_CMD_BEGIN(max) { \ + int i = 0, max_n = max; \ + uint32_t *vs_cmd = util_dynarray_grow_cap(&ctx->vs_cmd_array, max_n * 4); + +#define VS_CMD_END() \ + assert(i <= max_n); \ + ctx->vs_cmd_array.size += i * 4; \ +} + +#define VS_CMD(v1, v2) \ + do { \ + vs_cmd[i++] = v1; \ + vs_cmd[i++] = v2; \ + } while (0) + +#define VS_CMD_ARRAYS_SEMAPHORE_BEGIN_1() VS_CMD(0x00028000, 0x50000000) +#define VS_CMD_ARRAYS_SEMAPHORE_BEGIN_2() VS_CMD(0x00000001, 0x50000000) +#define VS_CMD_ARRAYS_SEMAPHORE_END(index_draw) \ + VS_CMD((index_draw) ? 0x00018000 : 0x00000000, 0x50000000) +#define VS_CMD_UNIFORMS_ADDRESS(addr, size) \ + VS_CMD(addr, 0x30000000 | ((size) << 12)) +#define VS_CMD_SHADER_ADDRESS(addr, size) \ + VS_CMD(addr, 0x40000000 | ((size) << 12)) +#define VS_CMD_SHADER_INFO(prefetch, size) \ + VS_CMD(((prefetch) << 20) | ((((size) >> 4) - 1) << 10), 0x10000040) +#define VS_CMD_VARYING_ATTRIBUTE_COUNT(nv, na) \ + VS_CMD((((nv) - 1) << 8) | (((na) - 1) << 24), 0x10000042) +#define VS_CMD_UNKNOWN1() VS_CMD(0x00000003, 0x10000041) +#define VS_CMD_UNKNOWN2() VS_CMD(0x00000000, 0x60000000) +#define VS_CMD_ATTRIBUTES_ADDRESS(addr, na) \ + VS_CMD(addr, 0x20000000 | ((na) << 17)) +#define VS_CMD_VARYINGS_ADDRESS(addr, nv) \ + VS_CMD(addr, 0x20000008 | ((nv) << 17)) +#define VS_CMD_DRAW(num, index_draw) \ + VS_CMD(((num) << 24) | ((index_draw) ? 1 : 0), ((num) >> 8)) + +static inline bool +lima_ctx_dirty(struct lima_context *ctx) +{ + return ctx->plbu_cmd_array.size; +} + +static bool +lima_fb_need_reload(struct lima_context *ctx) +{ + if (ctx->damage.region) { + /* for EGL_KHR_partial_update we just want to reload the + * region not aligned to tile boundary */ + if (!ctx->damage.aligned) + return true; + } + else { + struct lima_surface *surf = lima_surface(ctx->framebuffer.cbuf); + if (surf->reload) + return true; + } + + return false; +} + +static void +lima_pack_reload_plbu_cmd(struct lima_context *ctx) +{ + #define lima_reload_render_state_offset 0x0000 + #define lima_reload_gl_pos_offset 0x0040 + #define lima_reload_varying_offset 0x0080 + #define lima_reload_tex_desc_offset 0x00c0 + #define lima_reload_tex_array_offset 0x0100 + #define lima_reload_buffer_size 0x0140 + + void *cpu; + unsigned offset; + struct pipe_resource *pres = NULL; + u_upload_alloc(ctx->uploader, 0, lima_reload_buffer_size, + 0x40, &offset, &pres, &cpu); + + struct lima_resource *res = lima_resource(pres); + uint32_t va = res->bo->va + offset; + + struct lima_screen *screen = lima_screen(ctx->base.screen); + + uint32_t reload_shader_first_instr_size = + ((uint32_t *)(screen->pp_buffer->map + pp_reload_program_offset))[0] & 0x1f; + uint32_t reload_shader_va = screen->pp_buffer->va + pp_reload_program_offset; + + struct lima_render_state reload_render_state = { + .alpha_blend = 0xf03b1ad2, + .depth_test = 0x0000000e, + .depth_range = 0xffff0000, + .stencil_front = 0x00000007, + .stencil_back = 0x00000007, + .multi_sample = 0x0000f007, + .shader_address = reload_shader_va | reload_shader_first_instr_size, + .varying_types = 0x00000001, + .textures_address = va + lima_reload_tex_array_offset, + .aux0 = 0x00004021, + .varyings_address = va + lima_reload_varying_offset, + }; + memcpy(cpu + lima_reload_render_state_offset, &reload_render_state, + sizeof(reload_render_state)); + + struct lima_context_framebuffer *fb = &ctx->framebuffer; + uint32_t *td = cpu + lima_reload_tex_desc_offset; + memset(td, 0, lima_tex_desc_size); + lima_texture_desc_set_res(ctx, td, fb->cbuf->texture, 0, 0); + td[1] = 0x00000480; + td[2] |= 0x00093800; + td[4] = 0x00000000; + td[5] = 0x00000000; + + uint32_t *ta = cpu + lima_reload_tex_array_offset; + ta[0] = va + lima_reload_tex_desc_offset; + + float reload_gl_pos[] = { + fb->width, 0, 0, 1, + 0, 0, 0, 1, + 0, fb->height, 0, 1, + }; + memcpy(cpu + lima_reload_gl_pos_offset, reload_gl_pos, + sizeof(reload_gl_pos)); + + float reload_varying[] = { + fb->width, 0, 0, 0, + 0, fb->height, 0, 0, + }; + memcpy(cpu + lima_reload_varying_offset, reload_varying, + sizeof(reload_varying)); + + lima_submit_add_bo(ctx->pp_submit, res->bo, LIMA_SUBMIT_BO_READ); + pipe_resource_reference(&pres, NULL); + + PLBU_CMD_BEGIN(20); + + PLBU_CMD_VIEWPORT_X(0); + PLBU_CMD_VIEWPORT_W(fui(fb->width)); + PLBU_CMD_VIEWPORT_Y(0); + PLBU_CMD_VIEWPORT_H(fui(fb->height)); + + PLBU_CMD_RSW_VERTEX_ARRAY( + va + lima_reload_render_state_offset, + va + lima_reload_gl_pos_offset); + + PLBU_CMD_UNKNOWN2(); + PLBU_CMD_UNKNOWN1(); + + PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset); + PLBU_CMD_INDEXED_DEST(va + lima_reload_gl_pos_offset); + PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3); + + PLBU_CMD_END(); +} + +static void +lima_pack_clear_plbu_cmd(struct lima_context *ctx) +{ + #define lima_clear_render_state_offset 0x0000 + #define lima_clear_shader_offset 0x0040 + #define lima_clear_buffer_size 0x0080 + + void *cpu; + unsigned offset; + struct pipe_resource *pres = NULL; + u_upload_alloc(ctx->uploader, 0, lima_clear_buffer_size, + 0x40, &offset, &pres, &cpu); + + struct lima_resource *res = lima_resource(pres); + uint32_t va = res->bo->va + offset; + + struct lima_screen *screen = lima_screen(ctx->base.screen); + uint32_t gl_pos_va = screen->pp_buffer->va + pp_clear_gl_pos_offset; + + /* const0 clear_color, mov.v1 $0 ^const0.xxxx, stop */ + uint32_t clear_shader[] = { + 0x00021025, 0x0000000c, + (ctx->clear.color_16pc << 12) | 0x000007cf, + ctx->clear.color_16pc >> 12, + ctx->clear.color_16pc >> 44, + }; + memcpy(cpu + lima_clear_shader_offset, &clear_shader, + sizeof(clear_shader)); + + uint32_t clear_shader_va = va + lima_clear_shader_offset; + uint32_t clear_shader_first_instr_size = clear_shader[0] & 0x1f; + + struct lima_render_state clear_render_state = { + .blend_color_bg = 0x00800080, + .blend_color_ra = 0x00ff0080, + .alpha_blend = 0xfc321892, + .depth_test = 0x0000003e, + .depth_range = 0xffff0000, + .stencil_front = 0x00000007, + .stencil_back = 0x00000007, + .multi_sample = 0x0000f007, + .shader_address = clear_shader_va | clear_shader_first_instr_size, + }; + memcpy(cpu + lima_clear_render_state_offset, &clear_render_state, + sizeof(clear_render_state)); + + PLBU_CMD_BEGIN(22); + + PLBU_CMD_VIEWPORT_X(0); + PLBU_CMD_VIEWPORT_W(0x45800000); + PLBU_CMD_VIEWPORT_Y(0); + PLBU_CMD_VIEWPORT_H(0x45800000); + + struct pipe_scissor_state *scissor = &ctx->scissor; + PLBU_CMD_SCISSORS(scissor->minx, scissor->maxx, scissor->miny, scissor->maxy); + + PLBU_CMD_RSW_VERTEX_ARRAY(va + lima_clear_render_state_offset, gl_pos_va); + + PLBU_CMD_UNKNOWN2(); + PLBU_CMD_UNKNOWN1(); + + PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset); + PLBU_CMD_INDEXED_DEST(gl_pos_va); + PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3); + + PLBU_CMD_END(); +} + +static void +lima_pack_head_plbu_cmd(struct lima_context *ctx) +{ + /* first draw need create a PLBU command header */ + if (lima_ctx_dirty(ctx)) + return; + + struct lima_context_framebuffer *fb = &ctx->framebuffer; + + PLBU_CMD_BEGIN(10); + + PLBU_CMD_UNKNOWN2(); + PLBU_CMD_BLOCK_STEP(fb->shift_min, fb->shift_h, fb->shift_w); + PLBU_CMD_TILED_DIMENSIONS(fb->tiled_w, fb->tiled_h); + PLBU_CMD_BLOCK_STRIDE(fb->block_w); + + PLBU_CMD_ARRAY_ADDRESS( + ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size, + fb->block_w * fb->block_h); + + PLBU_CMD_END(); + + if (lima_fb_need_reload(ctx)) + lima_pack_reload_plbu_cmd(ctx); +} + +static bool +lima_is_scissor_zero(struct lima_context *ctx) +{ + if (!ctx->rasterizer || !ctx->rasterizer->base.scissor) + return false; + + struct pipe_scissor_state *scissor = &ctx->scissor; + return + scissor->minx == scissor->maxx + && scissor->miny == scissor->maxy; +} + +static bool +lima_is_scissor_full_fb(struct lima_context *ctx) +{ + if (!ctx->rasterizer || !ctx->rasterizer->base.scissor) + return true; + + struct pipe_scissor_state *scissor = &ctx->scissor; + struct lima_context_framebuffer *fb = &ctx->framebuffer; + return + scissor->minx == 0 && scissor->maxx == fb->width && + scissor->miny == 0 && scissor->maxy == fb->height; +} + +static void +hilbert_rotate(int n, int *x, int *y, int rx, int ry) +{ + if (ry == 0) { + if (rx == 1) { + *x = n-1 - *x; + *y = n-1 - *y; + } + + /* Swap x and y */ + int t = *x; + *x = *y; + *y = t; + } +} + +static void +hilbert_coords(int n, int d, int *x, int *y) +{ + int rx, ry, i, t=d; + + *x = *y = 0; + + for (i = 0; (1 << i) < n; i++) { + + rx = 1 & (t / 2); + ry = 1 & (t ^ rx); + + hilbert_rotate(1 << i, x, y, rx, ry); + + *x += rx << i; + *y += ry << i; + + t /= 4; + } +} + +static int +lima_get_pp_stream_size(int num_pp, int tiled_w, int tiled_h, uint32_t *off) +{ + /* carefully calculate each stream start address: + * 1. overflow: each stream size may be different due to + * fb->tiled_w * fb->tiled_h can't be divided by num_pp, + * extra size should be added to the preceeding stream + * 2. alignment: each stream address should be 0x20 aligned + */ + int delta = tiled_w * tiled_h / num_pp * 16 + 8; + int remain = tiled_w * tiled_h % num_pp; + int offset = 0; + + for (int i = 0; i < num_pp; i++) { + off[i] = offset; + + offset += delta; + if (remain) { + offset += 16; + remain--; + } + offset = align(offset, 0x20); + } + + return offset; +} + +static bool +inside_damage_region(int x, int y, struct lima_damage_state *ds) +{ + if (!ds->region) + return true; + + for (int i = 0; i < ds->num_region; i++) { + struct pipe_scissor_state *ss = ds->region + i; + if (x >= ss->minx && x < ss->maxx && + y >= ss->miny && y < ss->maxy) + return true; + } + + return false; +} + +static void +lima_update_pp_stream(struct lima_context *ctx, int off_x, int off_y, + int tiled_w, int tiled_h) +{ + struct lima_pp_stream_state *ps = &ctx->pp_stream; + struct lima_context_framebuffer *fb = &ctx->framebuffer; + struct lima_screen *screen = lima_screen(ctx->base.screen); + int i, num_pp = screen->num_pp; + + /* use hilbert_coords to generates 1D to 2D relationship. + * 1D for pp stream index and 2D for plb block x/y on framebuffer. + * if multi pp, interleave the 1D index to make each pp's render target + * close enough which should result close workload + */ + int max = MAX2(tiled_w, tiled_h); + int dim = util_logbase2_ceil(max); + int count = 1 << (dim + dim); + int index = 0; + uint32_t *stream[4]; + int si[4] = {0}; + + for (i = 0; i < num_pp; i++) + stream[i] = ps->bo->map + ps->bo_offset + ps->offset[i]; + + for (i = 0; i < count; i++) { + int x, y; + hilbert_coords(max, i, &x, &y); + if (x < tiled_w && y < tiled_h) { + x += off_x; + y += off_y; + + if (!inside_damage_region(x, y, &ctx->damage)) + continue; + + int pp = index % num_pp; + int offset = ((y >> fb->shift_h) * fb->block_w + + (x >> fb->shift_w)) * LIMA_CTX_PLB_BLK_SIZE; + int plb_va = ctx->plb[ctx->plb_index]->va + offset; + + stream[pp][si[pp]++] = 0; + stream[pp][si[pp]++] = 0xB8000000 | x | (y << 8); + stream[pp][si[pp]++] = 0xE0000002 | ((plb_va >> 3) & ~0xE0000003); + stream[pp][si[pp]++] = 0xB0000000; + + index++; + } + } + + for (i = 0; i < num_pp; i++) { + stream[i][si[i]++] = 0; + stream[i][si[i]++] = 0xBC000000; + + lima_dump_command_stream_print( + stream[i], si[i] * 4, false, "pp plb stream %d at va %x\n", + i, ps->bo->va + ps->bo_offset + ps->offset[i]); + } +} + +static void +lima_update_damage_pp_stream(struct lima_context *ctx) +{ + struct lima_damage_state *ds = &ctx->damage; + struct pipe_scissor_state max = ds->region[0]; + + /* find a max region to cover all the damage region */ + for (int i = 1; i < ds->num_region; i++) { + struct pipe_scissor_state *ss = ds->region + i; + max.minx = MIN2(max.minx, ss->minx); + max.miny = MIN2(max.miny, ss->miny); + max.maxx = MAX2(max.maxx, ss->maxx); + max.maxy = MAX2(max.maxy, ss->maxy); + } + + int tiled_w = max.maxx - max.minx; + int tiled_h = max.maxy - max.miny; + struct lima_screen *screen = lima_screen(ctx->base.screen); + int size = lima_get_pp_stream_size( + screen->num_pp, tiled_w, tiled_h, ctx->pp_stream.offset); + + void *cpu; + unsigned offset; + struct pipe_resource *pres = NULL; + u_upload_alloc(ctx->uploader, 0, size, 0x40, &offset, &pres, &cpu); + + struct lima_resource *res = lima_resource(pres); + ctx->pp_stream.bo = res->bo; + ctx->pp_stream.bo_offset = offset; + + lima_update_pp_stream(ctx, max.minx, max.miny, tiled_w, tiled_h); + + lima_submit_add_bo(ctx->pp_submit, res->bo, LIMA_SUBMIT_BO_READ); + pipe_resource_reference(&pres, NULL); +} + +static void +lima_update_full_pp_stream(struct lima_context *ctx) +{ + struct lima_context_framebuffer *fb = &ctx->framebuffer; + struct lima_ctx_plb_pp_stream_key key = { + .plb_index = ctx->plb_index, + .tiled_w = fb->tiled_w, + .tiled_h = fb->tiled_h, + }; + + struct hash_entry *entry = + _mesa_hash_table_search(ctx->plb_pp_stream, &key); + struct lima_ctx_plb_pp_stream *s = entry->data; + + if (s->bo) { + ctx->pp_stream.bo = s->bo; + ctx->pp_stream.bo_offset = 0; + memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset)); + } + else { + struct lima_screen *screen = lima_screen(ctx->base.screen); + int size = lima_get_pp_stream_size( + screen->num_pp, fb->tiled_w, fb->tiled_h, s->offset); + s->bo = lima_bo_create(screen, size, 0); + lima_bo_map(s->bo); + + ctx->pp_stream.bo = s->bo; + ctx->pp_stream.bo_offset = 0; + memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset)); + + lima_update_pp_stream(ctx, 0, 0, fb->tiled_w, fb->tiled_h); + } + + lima_submit_add_bo(ctx->pp_submit, s->bo, LIMA_SUBMIT_BO_READ); +} + +static void +lima_update_submit_bo(struct lima_context *ctx) +{ + if (lima_ctx_dirty(ctx)) + return; + + struct lima_screen *screen = lima_screen(ctx->base.screen); + lima_submit_add_bo(ctx->gp_submit, ctx->plb_gp_stream, LIMA_SUBMIT_BO_READ); + lima_submit_add_bo(ctx->gp_submit, ctx->plb[ctx->plb_index], LIMA_SUBMIT_BO_WRITE); + lima_submit_add_bo(ctx->gp_submit, screen->gp_buffer, LIMA_SUBMIT_BO_READ); + + lima_dump_command_stream_print( + ctx->plb_gp_stream->map + ctx->plb_index * ctx->plb_gp_size, + ctx->plb_gp_size, false, "gp plb stream at va %x\n", + ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size); + + if (ctx->damage.region) + lima_update_damage_pp_stream(ctx); + else if (ctx->plb_pp_stream) + lima_update_full_pp_stream(ctx); + else + ctx->pp_stream.bo = NULL; + + struct lima_resource *res = lima_resource(ctx->framebuffer.cbuf->texture); + lima_submit_add_bo(ctx->pp_submit, res->bo, LIMA_SUBMIT_BO_WRITE); + lima_submit_add_bo(ctx->pp_submit, ctx->plb[ctx->plb_index], LIMA_SUBMIT_BO_READ); + lima_submit_add_bo(ctx->pp_submit, screen->pp_buffer, LIMA_SUBMIT_BO_READ); +} + +static void +lima_clear(struct pipe_context *pctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct lima_context *ctx = lima_context(pctx); + bool full_fb_clear = lima_is_scissor_full_fb(ctx); + + if (full_fb_clear) { + lima_flush(ctx); + + /* no need to reload if cleared */ + if (buffers & PIPE_CLEAR_COLOR0) { + struct lima_surface *surf = lima_surface(ctx->framebuffer.cbuf); + surf->reload = false; + } + } + + struct lima_context_clear *clear = &ctx->clear; + clear->buffers = buffers; + + if (buffers & PIPE_CLEAR_COLOR0) { + clear->color_8pc = + ((uint32_t)float_to_ubyte(color->f[3]) << 24) | + ((uint32_t)float_to_ubyte(color->f[2]) << 16) | + ((uint32_t)float_to_ubyte(color->f[1]) << 8) | + float_to_ubyte(color->f[0]); + + clear->color_16pc = + ((uint64_t)float_to_ushort(color->f[3]) << 48) | + ((uint64_t)float_to_ushort(color->f[2]) << 32) | + ((uint64_t)float_to_ushort(color->f[1]) << 16) | + float_to_ushort(color->f[0]); + } + + if (buffers & PIPE_CLEAR_DEPTH) + clear->depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth); + + if (buffers & PIPE_CLEAR_STENCIL) + clear->stencil = stencil; + + lima_update_submit_bo(ctx); + + lima_pack_head_plbu_cmd(ctx); + + /* partial clear */ + if (!full_fb_clear) + lima_pack_clear_plbu_cmd(ctx); + + ctx->dirty |= LIMA_CONTEXT_DIRTY_CLEAR; +} + +enum lima_attrib_type { + LIMA_ATTRIB_FLOAT = 0x000, + /* todo: find out what lives here. */ + LIMA_ATTRIB_I16 = 0x004, + LIMA_ATTRIB_U16 = 0x005, + LIMA_ATTRIB_I8 = 0x006, + LIMA_ATTRIB_U8 = 0x007, + LIMA_ATTRIB_I8N = 0x008, + LIMA_ATTRIB_U8N = 0x009, + LIMA_ATTRIB_I16N = 0x00A, + LIMA_ATTRIB_U16N = 0x00B, + /* todo: where is the 32 int */ + /* todo: find out what lives here. */ + LIMA_ATTRIB_FIXED = 0x101 +}; + +static enum lima_attrib_type +lima_pipe_format_to_attrib_type(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + int i = util_format_get_first_non_void_channel(format); + const struct util_format_channel_description *c = desc->channel + i; + + switch (c->type) { + case UTIL_FORMAT_TYPE_FLOAT: + return LIMA_ATTRIB_FLOAT; + case UTIL_FORMAT_TYPE_FIXED: + return LIMA_ATTRIB_FIXED; + case UTIL_FORMAT_TYPE_SIGNED: + if (c->size == 8) { + if (c->normalized) + return LIMA_ATTRIB_I8N; + else + return LIMA_ATTRIB_I8; + } + else if (c->size == 16) { + if (c->normalized) + return LIMA_ATTRIB_I16N; + else + return LIMA_ATTRIB_I16; + } + break; + case UTIL_FORMAT_TYPE_UNSIGNED: + if (c->size == 8) { + if (c->normalized) + return LIMA_ATTRIB_U8N; + else + return LIMA_ATTRIB_U8; + } + else if (c->size == 16) { + if (c->normalized) + return LIMA_ATTRIB_U16N; + else + return LIMA_ATTRIB_U16; + } + break; + } + + return LIMA_ATTRIB_FLOAT; +} + +static void +lima_pack_vs_cmd(struct lima_context *ctx, const struct pipe_draw_info *info) +{ + VS_CMD_BEGIN(24); + + if (!info->index_size) { + VS_CMD_ARRAYS_SEMAPHORE_BEGIN_1(); + VS_CMD_ARRAYS_SEMAPHORE_BEGIN_2(); + } + + int uniform_size = ctx->vs->uniform_pending_offset + ctx->vs->constant_size + 32; + VS_CMD_UNIFORMS_ADDRESS( + lima_ctx_buff_va(ctx, lima_ctx_buff_gp_uniform, LIMA_CTX_BUFF_SUBMIT_GP), + align(uniform_size, 16)); + + VS_CMD_SHADER_ADDRESS(ctx->vs->bo->va, ctx->vs->shader_size); + VS_CMD_SHADER_INFO(ctx->vs->prefetch, ctx->vs->shader_size); + + int num_varryings = ctx->vs->num_varying; + int num_attributes = ctx->vertex_elements->num_elements; + VS_CMD_VARYING_ATTRIBUTE_COUNT(num_varryings, num_attributes); + + VS_CMD_UNKNOWN1(); + + VS_CMD_ATTRIBUTES_ADDRESS( + lima_ctx_buff_va(ctx, lima_ctx_buff_gp_attribute_info, LIMA_CTX_BUFF_SUBMIT_GP), + num_attributes); + + VS_CMD_VARYINGS_ADDRESS( + lima_ctx_buff_va(ctx, lima_ctx_buff_gp_varying_info, LIMA_CTX_BUFF_SUBMIT_GP), + num_varryings); + + unsigned num = info->index_size ? (ctx->max_index - ctx->min_index + 1) : info->count; + VS_CMD_DRAW(num, info->index_size); + + VS_CMD_UNKNOWN2(); + + VS_CMD_ARRAYS_SEMAPHORE_END(info->index_size); + + VS_CMD_END(); +} + +static void +lima_pack_plbu_cmd(struct lima_context *ctx, const struct pipe_draw_info *info) +{ + lima_pack_head_plbu_cmd(ctx); + + /* If it's zero scissor, we skip adding all other commands */ + if (lima_is_scissor_zero(ctx)) + return; + + PLBU_CMD_BEGIN(30); + + PLBU_CMD_VIEWPORT_X(fui(ctx->viewport.x)); + PLBU_CMD_VIEWPORT_W(fui(ctx->viewport.width)); + PLBU_CMD_VIEWPORT_Y(fui(ctx->viewport.y)); + PLBU_CMD_VIEWPORT_H(fui(ctx->viewport.height)); + + if (!info->index_size) + PLBU_CMD_ARRAYS_SEMAPHORE_BEGIN(); + + bool low_prim = info->mode < PIPE_PRIM_TRIANGLES; + int cf = ctx->rasterizer->base.cull_face; + int ccw = ctx->rasterizer->base.front_ccw; + uint32_t cull = 0; + if (cf != PIPE_FACE_NONE) { + if (cf & PIPE_FACE_FRONT) + cull |= ccw ? 0x00040000 : 0x00020000; + if (cf & PIPE_FACE_BACK) + cull |= ccw ? 0x00020000 : 0x00040000; + } + PLBU_CMD_PRIMITIVE_SETUP(low_prim, cull, info->index_size); + + uint32_t gl_position_va = + lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_pos, + LIMA_CTX_BUFF_SUBMIT_GP | LIMA_CTX_BUFF_SUBMIT_PP); + PLBU_CMD_RSW_VERTEX_ARRAY( + lima_ctx_buff_va(ctx, lima_ctx_buff_pp_plb_rsw, LIMA_CTX_BUFF_SUBMIT_PP), + gl_position_va); + + /* TODO + * - we should set it only for the first draw that enabled the scissor and for + * latter draw only if scissor is dirty + */ + if (ctx->rasterizer->base.scissor) { + struct pipe_scissor_state *scissor = &ctx->scissor; + PLBU_CMD_SCISSORS(scissor->minx, scissor->maxx, scissor->miny, scissor->maxy); + } + + PLBU_CMD_UNKNOWN1(); + + PLBU_CMD_DEPTH_RANGE_NEAR(fui(ctx->viewport.near)); + PLBU_CMD_DEPTH_RANGE_FAR(fui(ctx->viewport.far)); + + if (low_prim) { + uint32_t v = info->mode == PIPE_PRIM_POINTS ? + fui(ctx->rasterizer->base.point_size) : fui(ctx->rasterizer->base.line_width); + PLBU_CMD_LOW_PRIM_SIZE(v); + } + + if (info->index_size) { + PLBU_CMD_INDEXED_DEST(gl_position_va); + + struct pipe_resource *indexbuf = NULL; + unsigned index_offset = 0; + struct lima_resource *res; + if (info->has_user_indices) { + util_upload_index_buffer(&ctx->base, info, &indexbuf, &index_offset); + res = lima_resource(indexbuf); + } + else + res = lima_resource(info->index.resource); + + lima_submit_add_bo(ctx->gp_submit, res->bo, LIMA_SUBMIT_BO_READ); + PLBU_CMD_INDICES(res->bo->va + info->start * info->index_size + index_offset); + + if (indexbuf) + pipe_resource_reference(&indexbuf, NULL); + } + else { + /* can this make the attribute info static? */ + PLBU_CMD_DRAW_ARRAYS(info->mode, info->start, info->count); + } + + PLBU_CMD_ARRAYS_SEMAPHORE_END(); + + if (info->index_size) + PLBU_CMD_DRAW_ELEMENTS(info->mode, ctx->min_index, info->count); + + PLBU_CMD_END(); +} + +static int +lima_blend_func(enum pipe_blend_func pipe) +{ + switch (pipe) { + case PIPE_BLEND_ADD: + return 2; + case PIPE_BLEND_SUBTRACT: + return 0; + case PIPE_BLEND_REVERSE_SUBTRACT: + return 1; + case PIPE_BLEND_MIN: + return 4; + case PIPE_BLEND_MAX: + return 5; + } + return -1; +} + +static int +lima_blend_factor(enum pipe_blendfactor pipe) +{ + switch (pipe) { + case PIPE_BLENDFACTOR_ONE: + return 11; + case PIPE_BLENDFACTOR_SRC_COLOR: + return 0; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return 16; + case PIPE_BLENDFACTOR_DST_ALPHA: + return 17; + case PIPE_BLENDFACTOR_DST_COLOR: + return 1; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return 7; + case PIPE_BLENDFACTOR_CONST_COLOR: + return 2; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return 18; + case PIPE_BLENDFACTOR_ZERO: + return 3; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return 8; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return 24; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return 25; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return 9; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return 10; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return 26; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return -1; /* not support */ + } + return -1; +} + +static int +lima_calculate_alpha_blend(enum pipe_blend_func rgb_func, enum pipe_blend_func alpha_func, + enum pipe_blendfactor rgb_src_factor, enum pipe_blendfactor rgb_dst_factor, + enum pipe_blendfactor alpha_src_factor, enum pipe_blendfactor alpha_dst_factor) +{ + return lima_blend_func(rgb_func) | + (lima_blend_func(alpha_func) << 3) | + (lima_blend_factor(rgb_src_factor) << 6) | + (lima_blend_factor(rgb_dst_factor) << 11) | + ((lima_blend_factor(alpha_src_factor) & 0xF) << 16) | + ((lima_blend_factor(alpha_dst_factor) & 0xF) << 20) | + 0x0C000000; /* need check if this GLESv1 glAlphaFunc */ +} + +static int +lima_stencil_op(enum pipe_stencil_op pipe) +{ + switch (pipe) { + case PIPE_STENCIL_OP_KEEP: + return 0; + case PIPE_STENCIL_OP_ZERO: + return 2; + case PIPE_STENCIL_OP_REPLACE: + return 1; + case PIPE_STENCIL_OP_INCR: + return 6; + case PIPE_STENCIL_OP_DECR: + return 7; + case PIPE_STENCIL_OP_INCR_WRAP: + return 4; + case PIPE_STENCIL_OP_DECR_WRAP: + return 5; + case PIPE_STENCIL_OP_INVERT: + return 3; + } + return -1; +} + +static int +lima_calculate_depth_test(struct pipe_depth_state *depth, struct pipe_rasterizer_state *rst) +{ + enum pipe_compare_func func = (depth->enabled ? depth->func : PIPE_FUNC_ALWAYS); + + int offset_scale = 0; + + //TODO: implement polygon offset +#if 0 + if (rst->offset_scale < -32) + offset_scale = -32; + else if (rst->offset_scale > 31) + offset_scale = 31; + else + offset_scale = rst->offset_scale * 4; + + if (offset_scale < 0) + offset_scale = 0x100 + offset_scale; +#endif + + return (depth->enabled && depth->writemask) | + ((int)func << 1) | + (offset_scale << 16) | + 0x30; /* find out what is this */ +} + +static void +lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *info) +{ + struct lima_render_state *render = + lima_ctx_buff_alloc(ctx, lima_ctx_buff_pp_plb_rsw, + sizeof(*render), true); + + /* do hw support RGBA independ blend? + * PIPE_CAP_INDEP_BLEND_ENABLE + * + * how to handle the no cbuf only zbuf case? + */ + struct pipe_rt_blend_state *rt = ctx->blend->base.rt; + render->blend_color_bg = float_to_ubyte(ctx->blend_color.color[2]) | + (float_to_ubyte(ctx->blend_color.color[1]) << 16); + render->blend_color_ra = float_to_ubyte(ctx->blend_color.color[0]) | + (float_to_ubyte(ctx->blend_color.color[3]) << 16); + + if (rt->blend_enable) { + render->alpha_blend = lima_calculate_alpha_blend(rt->rgb_func, rt->alpha_func, + rt->rgb_src_factor, rt->rgb_dst_factor, + rt->alpha_src_factor, rt->alpha_dst_factor); + } + else { + /* + * Special handling for blending disabled. + * Binary driver is generating the same alpha_value, + * as when we would just enable blending, without changing/setting any blend equation/params. + * Normaly in this case mesa would set all rt fields (func/factor) to zero. + */ + render->alpha_blend = lima_calculate_alpha_blend(PIPE_BLEND_ADD, PIPE_BLEND_ADD, + PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO, + PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO); + } + + render->alpha_blend |= (rt->colormask & PIPE_MASK_RGBA) << 28; + + struct pipe_rasterizer_state *rst = &ctx->rasterizer->base; + struct pipe_depth_state *depth = &ctx->zsa->base.depth; + render->depth_test = lima_calculate_depth_test(depth, rst); + + /* overlap with plbu? any place can remove one? */ + render->depth_range = float_to_ushort(ctx->viewport.near) | + (float_to_ushort(ctx->viewport.far) << 16); + +#if 0 + struct pipe_stencil_state *stencil = ctx->zsa->base.stencil; + struct pipe_stencil_ref *ref = &ctx->stencil_ref; + render->stencil_front = stencil[0].func | + (lima_stencil_op(stencil[0].fail_op) << 3) | + (lima_stencil_op(stencil[0].zfail_op) << 6) | + (lima_stencil_op(stencil[0].zpass_op) << 9) | + (ref->ref_value[0] << 16) | + (stencil[0].valuemask << 24); + render->stencil_back = stencil[1].func | + (lima_stencil_op(stencil[1].fail_op) << 3) | + (lima_stencil_op(stencil[1].zfail_op) << 6) | + (lima_stencil_op(stencil[1].zpass_op) << 9) | + (ref->ref_value[1] << 16) | + (stencil[1].valuemask << 24); +#else + render->stencil_front = 0xff000007; + render->stencil_back = 0xff000007; +#endif + + /* seems not correct? */ + //struct pipe_alpha_state *alpha = &ctx->zsa->base.alpha; + render->stencil_test = 0; + //(stencil->enabled ? 0xFF : 0x00) | (float_to_ubyte(alpha->ref_value) << 16) + + /* need more investigation */ + if (info->mode == PIPE_PRIM_POINTS) + render->multi_sample = 0x0000F007; + else if (info->mode < PIPE_PRIM_TRIANGLES) + render->multi_sample = 0x0000F407; + else + render->multi_sample = 0x0000F807; + if (ctx->framebuffer.samples) + render->multi_sample |= 0x68; + + render->shader_address = + ctx->fs->bo->va | (((uint32_t *)ctx->fs->bo->map)[0] & 0x1F); + + /* seems not needed */ + render->uniforms_address = 0x00000000; + + render->textures_address = 0x00000000; + + /* more investigation */ + render->aux0 = 0x00000300 | (ctx->vs->varying_stride >> 3); + render->aux1 = 0x00003000; + + if (ctx->tex_stateobj.num_samplers) { + render->textures_address = + lima_ctx_buff_va(ctx, lima_ctx_buff_pp_tex_desc, LIMA_CTX_BUFF_SUBMIT_PP); + render->aux0 |= ctx->tex_stateobj.num_samplers << 14; + render->aux0 |= 0x20; + } + + if (ctx->const_buffer[PIPE_SHADER_FRAGMENT].buffer) { + render->uniforms_address = + lima_ctx_buff_va(ctx, lima_ctx_buff_pp_uniform_array, LIMA_CTX_BUFF_SUBMIT_PP); + render->uniforms_address |= ((ctx->buffer_state[lima_ctx_buff_pp_uniform].size) / 4 - 1); + render->aux0 |= 0x80; + render->aux1 |= 0x10000; + } + + if (ctx->vs->num_varying > 1) { + render->varying_types = 0x00000000; + render->varyings_address = + lima_ctx_buff_va(ctx, lima_ctx_buff_sh_varying, LIMA_CTX_BUFF_SUBMIT_PP); + for (int i = 1; i < ctx->vs->num_varying; i++) { + int val; + + struct lima_varying_info *v = ctx->vs->varying + i; + if (v->component_size == 4) + val = v->components > 2 ? 0 : 1; + else + val = v->components > 2 ? 2 : 3; + + int index = i - 1; + if (index < 10) + render->varying_types |= val << (3 * index); + else if (index == 10) { + render->varying_types |= val << 30; + render->varyings_address |= val >> 2; + } + else if (index == 11) + render->varyings_address |= val << 1; + } + } + else { + render->varying_types = 0x00000000; + render->varyings_address = 0x00000000; + } + + lima_dump_command_stream_print( + render, sizeof(*render), false, "add render state at va %x\n", + lima_ctx_buff_va(ctx, lima_ctx_buff_pp_plb_rsw, 0)); +} + +static void +lima_update_gp_attribute_info(struct lima_context *ctx, const struct pipe_draw_info *info) +{ + struct lima_vertex_element_state *ve = ctx->vertex_elements; + struct lima_context_vertex_buffer *vb = &ctx->vertex_buffers; + + uint32_t *attribute = + lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_attribute_info, + ve->num_elements * 8, true); + + int n = 0; + for (int i = 0; i < ve->num_elements; i++) { + struct pipe_vertex_element *pve = ve->pipe + i; + + assert(pve->vertex_buffer_index < vb->count); + assert(vb->enabled_mask & (1 << pve->vertex_buffer_index)); + + struct pipe_vertex_buffer *pvb = vb->vb + pve->vertex_buffer_index; + struct lima_resource *res = lima_resource(pvb->buffer.resource); + + lima_submit_add_bo(ctx->gp_submit, res->bo, LIMA_SUBMIT_BO_READ); + + unsigned start = info->index_size ? ctx->min_index : info->start; + attribute[n++] = res->bo->va + pvb->buffer_offset + pve->src_offset + + start * pvb->stride; + attribute[n++] = (pvb->stride << 11) | + (lima_pipe_format_to_attrib_type(pve->src_format) << 2) | + (util_format_get_nr_components(pve->src_format) - 1); + } + + lima_dump_command_stream_print( + attribute, n * 4, false, "update attribute info at va %x\n", + lima_ctx_buff_va(ctx, lima_ctx_buff_gp_attribute_info, 0)); +} + +static void +lima_update_gp_uniform(struct lima_context *ctx) +{ + struct lima_context_constant_buffer *ccb = + ctx->const_buffer + PIPE_SHADER_VERTEX; + struct lima_vs_shader_state *vs = ctx->vs; + + int size = vs->uniform_pending_offset + vs->constant_size + 32; + void *vs_const_buff = + lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_uniform, size, true); + + if (ccb->buffer) + memcpy(vs_const_buff, ccb->buffer, ccb->size); + + memcpy(vs_const_buff + vs->uniform_pending_offset, + ctx->viewport.transform.scale, + sizeof(ctx->viewport.transform.scale)); + memcpy(vs_const_buff + vs->uniform_pending_offset + 16, + ctx->viewport.transform.translate, + sizeof(ctx->viewport.transform.translate)); + + if (vs->constant) + memcpy(vs_const_buff + vs->uniform_pending_offset + 32, + vs->constant, vs->constant_size); + + lima_dump_command_stream_print( + vs_const_buff, size, true, + "update gp uniform at va %x\n", + lima_ctx_buff_va(ctx, lima_ctx_buff_gp_uniform, 0)); +} + +static void +lima_update_pp_uniform(struct lima_context *ctx) +{ + const float *const_buff = ctx->const_buffer[PIPE_SHADER_FRAGMENT].buffer; + size_t const_buff_size = ctx->const_buffer[PIPE_SHADER_FRAGMENT].size / sizeof(float); + + if (!const_buff) + return; + + uint16_t *fp16_const_buff = + lima_ctx_buff_alloc(ctx, lima_ctx_buff_pp_uniform, + const_buff_size * sizeof(uint16_t), true); + + uint32_t *array = + lima_ctx_buff_alloc(ctx, lima_ctx_buff_pp_uniform_array, 4, true); + + for (int i = 0; i < const_buff_size; i++) + fp16_const_buff[i] = util_float_to_half(const_buff[i]); + + *array = lima_ctx_buff_va(ctx, lima_ctx_buff_pp_uniform, LIMA_CTX_BUFF_SUBMIT_PP); + + lima_dump_command_stream_print( + fp16_const_buff, const_buff_size * 2, false, "add pp uniform data at va %x\n", + lima_ctx_buff_va(ctx, lima_ctx_buff_pp_uniform, 0)); + lima_dump_command_stream_print( + array, 4, false, "add pp uniform info at va %x\n", + lima_ctx_buff_va(ctx, lima_ctx_buff_pp_uniform_array, 0)); +} + +static void +lima_update_varying(struct lima_context *ctx, const struct pipe_draw_info *info) +{ + struct lima_vs_shader_state *vs = ctx->vs; + + uint32_t *varying = + lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_varying_info, + vs->num_varying * 8, true); + int n = 0; + + /* should be LIMA_SUBMIT_BO_WRITE for GP, but each draw will use + * different part of this bo, so no need to set exclusive constraint */ + lima_ctx_buff_alloc(ctx, lima_ctx_buff_sh_gl_pos, + 4 * 4 * info->count, false); + + /* for gl_Position */ + varying[n++] = + lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_pos, + LIMA_CTX_BUFF_SUBMIT_GP | LIMA_CTX_BUFF_SUBMIT_PP); + varying[n++] = 0x8020; + + int offset = 0; + for (int i = 1; i < vs->num_varying; i++) { + struct lima_varying_info *v = vs->varying + i; + int size = v->component_size * 4; + + /* does component_size == 2 need to be 16 aligned? */ + if (v->component_size == 4) + offset = align(offset, 16); + + v->offset = offset; + offset += size; + } + vs->varying_stride = align(offset, 16); + + if (vs->num_varying > 1) + lima_ctx_buff_alloc(ctx, lima_ctx_buff_sh_varying, + vs->varying_stride * info->count, false); + + for (int i = 1; i < vs->num_varying; i++) { + struct lima_varying_info *v = vs->varying + i; + varying[n++] = + lima_ctx_buff_va(ctx, lima_ctx_buff_sh_varying, LIMA_CTX_BUFF_SUBMIT_GP) + + v->offset; + varying[n++] = (vs->varying_stride << 11) | (v->components - 1) | + (v->component_size == 2 ? 0x0C : 0); + } + + lima_dump_command_stream_print( + varying, n * 4, false, "update varying info at va %x\n", + lima_ctx_buff_va(ctx, lima_ctx_buff_gp_varying_info, 0)); +} + +static void +lima_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) +{ + /* check if draw mode and vertex/index count match, + * otherwise gp will hang */ + if (!u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) { + debug_printf("draw mode and vertex/index count mismatch\n"); + return; + } + + struct lima_context *ctx = lima_context(pctx); + + if (!ctx->vs || !ctx->fs) { + debug_warn_once("no shader, skip draw\n"); + return; + } + + if (!lima_update_vs_state(ctx) || !lima_update_fs_state(ctx)) + return; + + lima_dump_command_stream_print( + ctx->vs->bo->map, ctx->vs->shader_size, false, + "add vs at va %x\n", ctx->vs->bo->va); + + lima_dump_command_stream_print( + ctx->fs->bo->map, ctx->fs->shader_size, false, + "add fs at va %x\n", ctx->fs->bo->va); + + lima_submit_add_bo(ctx->gp_submit, ctx->vs->bo, LIMA_SUBMIT_BO_READ); + lima_submit_add_bo(ctx->pp_submit, ctx->fs->bo, LIMA_SUBMIT_BO_READ); + + lima_update_submit_bo(ctx); + + /* Mali Utgard GPU always need min/max index info for index draw, + * compute it if upper layer does not do for us */ + if (info->index_size && info->max_index == ~0u) + u_vbuf_get_minmax_index(pctx, info, &ctx->min_index, &ctx->max_index); + else { + ctx->min_index = info->min_index; + ctx->max_index = info->max_index; + } + + lima_update_gp_attribute_info(ctx, info); + + if ((ctx->dirty & LIMA_CONTEXT_DIRTY_CONST_BUFF && + ctx->const_buffer[PIPE_SHADER_VERTEX].dirty) || + ctx->dirty & LIMA_CONTEXT_DIRTY_VIEWPORT || + ctx->dirty & LIMA_CONTEXT_DIRTY_SHADER_VERT) { + lima_update_gp_uniform(ctx); + ctx->const_buffer[PIPE_SHADER_VERTEX].dirty = false; + } + + lima_update_varying(ctx, info); + + /* If it's zero scissor, don't build vs cmd list */ + if (!lima_is_scissor_zero(ctx)) + lima_pack_vs_cmd(ctx, info); + + if (ctx->dirty & LIMA_CONTEXT_DIRTY_CONST_BUFF && + ctx->const_buffer[PIPE_SHADER_FRAGMENT].dirty) { + lima_update_pp_uniform(ctx); + ctx->const_buffer[PIPE_SHADER_FRAGMENT].dirty = false; + } + + if (ctx->dirty & LIMA_CONTEXT_DIRTY_TEXTURES) + lima_update_textures(ctx); + + lima_pack_render_state(ctx, info); + lima_pack_plbu_cmd(ctx, info); + + ctx->dirty = 0; +} + +static void +lima_finish_plbu_cmd(struct lima_context *ctx) +{ + int i = 0; + uint32_t *plbu_cmd = util_dynarray_grow_cap(&ctx->plbu_cmd_array, 2 * 4); + + plbu_cmd[i++] = 0x00000000; + plbu_cmd[i++] = 0x50000000; /* END */ + + ctx->plbu_cmd_array.size += i * 4; +} + +static void +lima_pack_pp_frame_reg(struct lima_context *ctx, uint32_t *frame_reg, + uint32_t *wb_reg) +{ + struct lima_resource *res = lima_resource(ctx->framebuffer.cbuf->texture); + + bool swap_channels = false; + switch (ctx->framebuffer.cbuf->format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + swap_channels = true; + break; + default: + break; + } + + struct lima_context_framebuffer *fb = &ctx->framebuffer; + struct lima_pp_frame_reg *frame = (void *)frame_reg; + struct lima_screen *screen = lima_screen(ctx->base.screen); + frame->render_address = screen->pp_buffer->va + pp_frame_rsw_offset; + frame->flags = 0x02; + frame->clear_value_depth = ctx->clear.depth; + frame->clear_value_stencil = ctx->clear.stencil; + frame->clear_value_color = ctx->clear.color_8pc; + frame->clear_value_color_1 = ctx->clear.color_8pc; + frame->clear_value_color_2 = ctx->clear.color_8pc; + frame->clear_value_color_3 = ctx->clear.color_8pc; + frame->one = 1; + + frame->width = fb->width - 1; + frame->height = fb->height - 1; + + /* frame->fragment_stack_address is overwritten per-pp in the kernel + * by the values of pp_frame.fragment_stack_address[i] */ + + /* These are "stack size" and "stack offset" shifted, + * here they are assumed to be always the same. */ + uint32_t fs_stack_size = ctx->fs ? ctx->fs->stack_size : 0; + frame->fragment_stack_size = fs_stack_size << 16 | fs_stack_size; + + /* related with MSAA and different value when r4p0/r7p0 */ + frame->supersampled_height = fb->height * 2 - 1; + frame->scale = 0xE0C; + + frame->dubya = 0x77; + frame->onscreen = 1; + frame->blocking = (fb->shift_min << 28) | (fb->shift_h << 16) | fb->shift_w; + frame->foureight = 0x8888; + + struct lima_pp_wb_reg *wb = (void *)wb_reg; + wb[0].type = 0x02; /* 1 for depth, stencil */ + wb[0].address = res->bo->va; + wb[0].pixel_format = 0x03; /* BGRA8888 */ + if (res->tiled) { + wb[0].pixel_layout = 0x2; + wb[0].pitch = fb->tiled_w; + } else { + wb[0].pixel_layout = 0x0; + wb[0].pitch = res->levels[0].stride / 8; + } + wb[0].mrt_bits = swap_channels ? 0x4 : 0x0; +} + +static void +_lima_flush(struct lima_context *ctx, bool end_of_frame) +{ + lima_finish_plbu_cmd(ctx); + + int vs_cmd_size = ctx->vs_cmd_array.size; + int plbu_cmd_size = ctx->plbu_cmd_array.size; + uint32_t vs_cmd_va = 0; + uint32_t plbu_cmd_va; + + if (vs_cmd_size) { + void *vs_cmd = + lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_vs_cmd, vs_cmd_size, true); + memcpy(vs_cmd, util_dynarray_begin(&ctx->vs_cmd_array), vs_cmd_size); + util_dynarray_clear(&ctx->vs_cmd_array); + vs_cmd_va = lima_ctx_buff_va(ctx, lima_ctx_buff_gp_vs_cmd, + LIMA_CTX_BUFF_SUBMIT_GP); + + lima_dump_command_stream_print( + vs_cmd, vs_cmd_size, false, "flush vs cmd at va %x\n", vs_cmd_va); + } + + void *plbu_cmd = + lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_plbu_cmd, plbu_cmd_size, true); + memcpy(plbu_cmd, util_dynarray_begin(&ctx->plbu_cmd_array), plbu_cmd_size); + util_dynarray_clear(&ctx->plbu_cmd_array); + plbu_cmd_va = lima_ctx_buff_va(ctx, lima_ctx_buff_gp_plbu_cmd, + LIMA_CTX_BUFF_SUBMIT_GP); + + lima_dump_command_stream_print( + plbu_cmd, plbu_cmd_size, false, "flush plbu cmd at va %x\n", plbu_cmd_va); + + struct lima_screen *screen = lima_screen(ctx->base.screen); + struct drm_lima_gp_frame gp_frame; + struct lima_gp_frame_reg *gp_frame_reg = (void *)gp_frame.frame; + gp_frame_reg->vs_cmd_start = vs_cmd_va; + gp_frame_reg->vs_cmd_end = vs_cmd_va + vs_cmd_size; + gp_frame_reg->plbu_cmd_start = plbu_cmd_va; + gp_frame_reg->plbu_cmd_end = plbu_cmd_va + plbu_cmd_size; + gp_frame_reg->tile_heap_start = screen->gp_buffer->va + gp_tile_heap_offset; + gp_frame_reg->tile_heap_end = screen->gp_buffer->va + gp_buffer_size; + + lima_dump_command_stream_print( + &gp_frame, sizeof(gp_frame), false, "add gp frame\n"); + + if (!lima_submit_start(ctx->gp_submit, &gp_frame, sizeof(gp_frame))) + fprintf(stderr, "gp submit error\n"); + + if (lima_dump_command_stream) { + if (lima_submit_wait(ctx->gp_submit, PIPE_TIMEOUT_INFINITE)) { + if (ctx->buffer_state[lima_ctx_buff_sh_gl_pos].res) { + float *pos = lima_ctx_buff_map(ctx, lima_ctx_buff_sh_gl_pos); + lima_dump_command_stream_print( + pos, 4 * 4 * 16, true, "gl_pos dump at va %x\n", + lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_pos, 0)); + } + + uint32_t *plb = lima_bo_map(ctx->plb[ctx->plb_index]); + lima_dump_command_stream_print( + plb, LIMA_CTX_PLB_BLK_SIZE, false, "plb dump at va %x\n", + ctx->plb[ctx->plb_index]->va); + } + else { + fprintf(stderr, "gp submit wait error\n"); + exit(1); + } + } + + struct lima_pp_stream_state *ps = &ctx->pp_stream; + if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) { + struct drm_lima_m400_pp_frame pp_frame = {0}; + lima_pack_pp_frame_reg(ctx, pp_frame.frame, pp_frame.wb); + pp_frame.num_pp = screen->num_pp; + + for (int i = 0; i < screen->num_pp; i++) { + pp_frame.plbu_array_address[i] = ps->bo->va + ps->bo_offset + ps->offset[i]; + pp_frame.fragment_stack_address[i] = screen->pp_buffer->va + + pp_stack_offset + pp_stack_pp_size * i; + } + + lima_dump_command_stream_print( + &pp_frame, sizeof(pp_frame), false, "add pp frame\n"); + + if (!lima_submit_start(ctx->pp_submit, &pp_frame, sizeof(pp_frame))) + fprintf(stderr, "pp submit error\n"); + } + else { + struct drm_lima_m450_pp_frame pp_frame = {0}; + lima_pack_pp_frame_reg(ctx, pp_frame.frame, pp_frame.wb); + pp_frame.num_pp = screen->num_pp; + + for (int i = 0; i < screen->num_pp; i++) + pp_frame.fragment_stack_address[i] = screen->pp_buffer->va + + pp_stack_offset + pp_stack_pp_size * i; + + if (ps->bo) { + for (int i = 0; i < screen->num_pp; i++) + pp_frame.plbu_array_address[i] = ps->bo->va + ps->bo_offset + ps->offset[i]; + } + else { + pp_frame.use_dlbu = true; + + struct lima_context_framebuffer *fb = &ctx->framebuffer; + pp_frame.dlbu_regs[0] = ctx->plb[ctx->plb_index]->va; + pp_frame.dlbu_regs[1] = ((fb->tiled_h - 1) << 16) | (fb->tiled_w - 1); + unsigned s = util_logbase2(LIMA_CTX_PLB_BLK_SIZE) - 7; + pp_frame.dlbu_regs[2] = (s << 28) | (fb->shift_h << 16) | fb->shift_w; + pp_frame.dlbu_regs[3] = ((fb->tiled_h - 1) << 24) | ((fb->tiled_w - 1) << 16); + } + + lima_dump_command_stream_print( + &pp_frame, sizeof(pp_frame), false, "add pp frame\n"); + + if (!lima_submit_start(ctx->pp_submit, &pp_frame, sizeof(pp_frame))) + fprintf(stderr, "pp submit error\n"); + } + + if (lima_dump_command_stream) { + if (!lima_submit_wait(ctx->pp_submit, PIPE_TIMEOUT_INFINITE)) { + fprintf(stderr, "pp wait error\n"); + exit(1); + } + } + + ctx->plb_index = (ctx->plb_index + 1) % lima_ctx_num_plb; + + /* this surface may need reload when next draw if not end of frame */ + struct lima_surface *surf = lima_surface(ctx->framebuffer.cbuf); + surf->reload = !end_of_frame; +} + +void +lima_flush(struct lima_context *ctx) +{ + if (!lima_ctx_dirty(ctx)) + return; + + _lima_flush(ctx, false); +} + +static void +lima_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, + unsigned flags) +{ + struct lima_context *ctx = lima_context(pctx); + if (!lima_ctx_dirty(ctx)) + return; + + _lima_flush(ctx, flags & PIPE_FLUSH_END_OF_FRAME); + + if (fence) { + int fd; + if (lima_submit_get_out_sync(ctx->pp_submit, &fd)) + *fence = lima_fence_create(fd); + } +} + +void +lima_draw_init(struct lima_context *ctx) +{ + ctx->base.clear = lima_clear; + ctx->base.draw_vbo = lima_draw_vbo; + ctx->base.flush = lima_pipe_flush; +} diff --git a/src/gallium/drivers/lima/lima_fence.c b/src/gallium/drivers/lima/lima_fence.c new file mode 100644 index 00000000000..50114c24c2b --- /dev/null +++ b/src/gallium/drivers/lima/lima_fence.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2018-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include + +#include +#include + +#include "drm-uapi/lima_drm.h" + +#include "lima_screen.h" +#include "lima_context.h" +#include "lima_fence.h" +#include "lima_submit.h" + +struct pipe_fence_handle { + struct pipe_reference reference; + int fd; +}; + +static void +lima_create_fence_fd(struct pipe_context *pctx, + struct pipe_fence_handle **fence, + int fd, enum pipe_fd_type type) +{ + assert(type == PIPE_FD_TYPE_NATIVE_SYNC); + *fence = lima_fence_create(fcntl(fd, F_DUPFD_CLOEXEC, 3)); +} + +static void +lima_fence_server_sync(struct pipe_context *pctx, + struct pipe_fence_handle *fence) +{ + struct lima_context *ctx = lima_context(pctx); + + lima_submit_add_in_sync(ctx->gp_submit, fence->fd); +} + +void lima_fence_context_init(struct lima_context *ctx) +{ + ctx->base.create_fence_fd = lima_create_fence_fd; + ctx->base.fence_server_sync = lima_fence_server_sync; +} + +struct pipe_fence_handle * +lima_fence_create(int fd) +{ + struct pipe_fence_handle *fence; + + fence = CALLOC_STRUCT(pipe_fence_handle); + if (!fence) + return NULL; + + pipe_reference_init(&fence->reference, 1); + fence->fd = fd; + + return fence; +} + +static int +lima_fence_get_fd(struct pipe_screen *pscreen, + struct pipe_fence_handle *fence) +{ + return fcntl(fence->fd, F_DUPFD_CLOEXEC, 3); +} + +static void +lima_fence_destroy(struct pipe_fence_handle *fence) +{ + if (fence->fd >= 0) + close(fence->fd); + FREE(fence); +} + +static void +lima_fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + if (pipe_reference(&(*ptr)->reference, &fence->reference)) + lima_fence_destroy(*ptr); + *ptr = fence; +} + +static boolean +lima_fence_finish(struct pipe_screen *pscreen, struct pipe_context *pctx, + struct pipe_fence_handle *fence, uint64_t timeout) +{ + return !sync_wait(fence->fd, timeout / 1000000); +} + +void +lima_fence_screen_init(struct lima_screen *screen) +{ + screen->base.fence_reference = lima_fence_reference; + screen->base.fence_finish = lima_fence_finish; + screen->base.fence_get_fd = lima_fence_get_fd; +} diff --git a/src/gallium/drivers/lima/lima_fence.h b/src/gallium/drivers/lima/lima_fence.h new file mode 100644 index 00000000000..e92884a3897 --- /dev/null +++ b/src/gallium/drivers/lima/lima_fence.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef H_LIMA_FENCE +#define H_LIMA_FENCE + +struct pipe_fence_handle; +struct lima_context; +struct lima_screen; + +struct pipe_fence_handle *lima_fence_create(int fd); +void lima_fence_screen_init(struct lima_screen *screen); +void lima_fence_context_init(struct lima_context *ctx); + +#endif diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c new file mode 100644 index 00000000000..1b6d65a2dba --- /dev/null +++ b/src/gallium/drivers/lima/lima_program.c @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_memory.h" +#include "util/ralloc.h" +#include "util/u_debug.h" + +#include "tgsi/tgsi_dump.h" +#include "compiler/nir/nir.h" +#include "nir/tgsi_to_nir.h" + +#include "pipe/p_state.h" + +#include "lima_screen.h" +#include "lima_context.h" +#include "lima_program.h" +#include "lima_bo.h" +#include "ir/lima_ir.h" + +static const nir_shader_compiler_options vs_nir_options = { + .lower_ffma = true, + .lower_fpow = true, + .lower_ffract = true, + .lower_fdiv = true, + .lower_fsqrt = true, + .lower_sub = true, + .lower_flrp32 = true, + .lower_flrp64 = true, + /* could be implemented by clamp */ + .lower_fsat = true, +}; + +static const nir_shader_compiler_options fs_nir_options = { + .lower_fpow = true, + .lower_fdiv = true, + .lower_sub = true, + .lower_flrp32 = true, + .lower_flrp64 = true, +}; + +const void * +lima_program_get_compiler_options(enum pipe_shader_type shader) +{ + switch (shader) { + case PIPE_SHADER_VERTEX: + return &vs_nir_options; + case PIPE_SHADER_FRAGMENT: + return &fs_nir_options; + default: + return NULL; + } +} + +static int +type_size(const struct glsl_type *type) +{ + return glsl_count_attribute_slots(type, false); +} + +static void +lima_program_optimize_vs_nir(struct nir_shader *s) +{ + bool progress; + + NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, 0); + NIR_PASS_V(s, nir_lower_regs_to_ssa); + NIR_PASS_V(s, nir_lower_load_const_to_scalar); + NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar); + NIR_PASS_V(s, nir_lower_io_to_scalar, + nir_var_shader_in|nir_var_shader_out); + + do { + progress = false; + + NIR_PASS_V(s, nir_lower_vars_to_ssa); + NIR_PASS(progress, s, nir_lower_alu_to_scalar); + NIR_PASS(progress, s, nir_lower_phis_to_scalar); + NIR_PASS(progress, s, nir_copy_prop); + NIR_PASS(progress, s, nir_opt_remove_phis); + NIR_PASS(progress, s, nir_opt_dce); + NIR_PASS(progress, s, nir_opt_dead_cf); + NIR_PASS(progress, s, nir_opt_cse); + NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); + NIR_PASS(progress, s, nir_opt_algebraic); + NIR_PASS(progress, s, nir_opt_constant_folding); + NIR_PASS(progress, s, nir_opt_undef); + NIR_PASS(progress, s, nir_opt_loop_unroll, + nir_var_shader_in | + nir_var_shader_out | + nir_var_function_temp); + } while (progress); + + NIR_PASS_V(s, nir_lower_locals_to_regs); + NIR_PASS_V(s, nir_convert_from_ssa, true); + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp); + nir_sweep(s); +} + +static void +lima_program_optimize_fs_nir(struct nir_shader *s) +{ + bool progress; + + NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, 0); + NIR_PASS_V(s, nir_lower_regs_to_ssa); + + do { + progress = false; + + NIR_PASS_V(s, nir_lower_vars_to_ssa); + //NIR_PASS(progress, s, nir_lower_alu_to_scalar); + NIR_PASS(progress, s, nir_lower_phis_to_scalar); + NIR_PASS(progress, s, nir_copy_prop); + NIR_PASS(progress, s, nir_opt_remove_phis); + NIR_PASS(progress, s, nir_opt_dce); + NIR_PASS(progress, s, nir_opt_dead_cf); + NIR_PASS(progress, s, nir_opt_cse); + NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); + NIR_PASS(progress, s, nir_opt_algebraic); + NIR_PASS(progress, s, nir_opt_constant_folding); + NIR_PASS(progress, s, nir_opt_undef); + NIR_PASS(progress, s, nir_opt_loop_unroll, + nir_var_shader_in | + nir_var_shader_out | + nir_var_function_temp); + } while (progress); + + /* Lower modifiers */ + NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods); + NIR_PASS_V(s, nir_copy_prop); + NIR_PASS_V(s, nir_opt_dce); + + NIR_PASS_V(s, nir_lower_locals_to_regs); + NIR_PASS_V(s, nir_convert_from_ssa, true); + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp); + + NIR_PASS_V(s, nir_move_vec_src_uses_to_dest); + NIR_PASS_V(s, nir_lower_vec_to_movs); + + nir_sweep(s); +} + +static void * +lima_create_fs_state(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct lima_screen *screen = lima_screen(pctx->screen); + struct lima_fs_shader_state *so = rzalloc(NULL, struct lima_fs_shader_state); + + if (!so) + return NULL; + + nir_shader *nir; + if (cso->type == PIPE_SHADER_IR_NIR) + nir = cso->ir.nir; + else { + assert(cso->type == PIPE_SHADER_IR_TGSI); + + nir = tgsi_to_nir(cso->tokens, pctx->screen); + } + + lima_program_optimize_fs_nir(nir); + + if (lima_debug & LIMA_DEBUG_PP) + nir_print_shader(nir, stdout); + + if (!ppir_compile_nir(so, nir, screen->pp_ra)) { + ralloc_free(so); + return NULL; + } + + return so; +} + +static void +lima_bind_fs_state(struct pipe_context *pctx, void *hwcso) +{ + struct lima_context *ctx = lima_context(pctx); + + ctx->fs = hwcso; + ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_FRAG; +} + +static void +lima_delete_fs_state(struct pipe_context *pctx, void *hwcso) +{ + struct lima_fs_shader_state *so = hwcso; + + if (so->bo) + lima_bo_free(so->bo); + + ralloc_free(so); +} + +bool +lima_update_vs_state(struct lima_context *ctx) +{ + struct lima_vs_shader_state *vs = ctx->vs; + if (!vs->bo) { + struct lima_screen *screen = lima_screen(ctx->base.screen); + vs->bo = lima_bo_create(screen, vs->shader_size, 0); + if (!vs->bo) { + fprintf(stderr, "lima: create vs shader bo fail\n"); + return false; + } + + memcpy(lima_bo_map(vs->bo), vs->shader, vs->shader_size); + ralloc_free(vs->shader); + vs->shader = NULL; + } + + return true; +} + +bool +lima_update_fs_state(struct lima_context *ctx) +{ + struct lima_fs_shader_state *fs = ctx->fs; + if (!fs->bo) { + struct lima_screen *screen = lima_screen(ctx->base.screen); + fs->bo = lima_bo_create(screen, fs->shader_size, 0); + if (!fs->bo) { + fprintf(stderr, "lima: create fs shader bo fail\n"); + return false; + } + + memcpy(lima_bo_map(fs->bo), fs->shader, fs->shader_size); + ralloc_free(fs->shader); + fs->shader = NULL; + } + + return true; +} + +static void * +lima_create_vs_state(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct lima_vs_shader_state *so = rzalloc(NULL, struct lima_vs_shader_state); + + if (!so) + return NULL; + + nir_shader *nir; + if (cso->type == PIPE_SHADER_IR_NIR) + nir = cso->ir.nir; + else { + assert(cso->type == PIPE_SHADER_IR_TGSI); + + nir = tgsi_to_nir(cso->tokens, pctx->screen); + } + + lima_program_optimize_vs_nir(nir); + + if (lima_debug & LIMA_DEBUG_GP) + nir_print_shader(nir, stdout); + + if (!gpir_compile_nir(so, nir)) { + ralloc_free(so); + return NULL; + } + + return so; +} + +static void +lima_bind_vs_state(struct pipe_context *pctx, void *hwcso) +{ + struct lima_context *ctx = lima_context(pctx); + + ctx->vs = hwcso; + ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_VERT; +} + +static void +lima_delete_vs_state(struct pipe_context *pctx, void *hwcso) +{ + struct lima_vs_shader_state *so = hwcso; + + if (so->bo) + lima_bo_free(so->bo); + + ralloc_free(so); +} + +void +lima_program_init(struct lima_context *ctx) +{ + ctx->base.create_fs_state = lima_create_fs_state; + ctx->base.bind_fs_state = lima_bind_fs_state; + ctx->base.delete_fs_state = lima_delete_fs_state; + + ctx->base.create_vs_state = lima_create_vs_state; + ctx->base.bind_vs_state = lima_bind_vs_state; + ctx->base.delete_vs_state = lima_delete_vs_state; +} diff --git a/src/gallium/drivers/lima/lima_program.h b/src/gallium/drivers/lima/lima_program.h new file mode 100644 index 00000000000..9b5e676554c --- /dev/null +++ b/src/gallium/drivers/lima/lima_program.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef H_LIMA_PROGRAM +#define H_LIMA_PROGRAM + +#include "pipe/p_defines.h" + +const void *lima_program_get_compiler_options(enum pipe_shader_type shader); + +bool lima_update_vs_state(struct lima_context *ctx); +bool lima_update_fs_state(struct lima_context *ctx); + +#endif diff --git a/src/gallium/drivers/lima/lima_query.c b/src/gallium/drivers/lima/lima_query.c new file mode 100644 index 00000000000..2590ac1bad8 --- /dev/null +++ b/src/gallium/drivers/lima/lima_query.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +/** + * Stub support for occlusion queries. + * + * Since we expose support for GL 2.0, we have to expose occlusion queries, + * but the spec allows you to expose 0 query counter bits, so we just return 0 + * as the result of all our queries. + */ + +#include "util/u_debug.h" + +#include "lima_context.h" + +struct lima_query +{ + uint8_t pad; +}; + +static struct pipe_query * +lima_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +{ + struct lima_query *query = calloc(1, sizeof(*query)); + + /* Note that struct pipe_query isn't actually defined anywhere. */ + return (struct pipe_query *)query; +} + +static void +lima_destroy_query(struct pipe_context *ctx, struct pipe_query *query) +{ + free(query); +} + +static boolean +lima_begin_query(struct pipe_context *ctx, struct pipe_query *query) +{ + return true; +} + +static bool +lima_end_query(struct pipe_context *ctx, struct pipe_query *query) +{ + return true; +} + +static boolean +lima_get_query_result(struct pipe_context *ctx, struct pipe_query *query, + boolean wait, union pipe_query_result *vresult) +{ + uint64_t *result = &vresult->u64; + + *result = 0; + + return true; +} + +static void +lima_set_active_query_state(struct pipe_context *pipe, boolean enable) +{ + +} + +void +lima_query_init(struct lima_context *pctx) +{ + pctx->base.create_query = lima_create_query; + pctx->base.destroy_query = lima_destroy_query; + pctx->base.begin_query = lima_begin_query; + pctx->base.end_query = lima_end_query; + pctx->base.get_query_result = lima_get_query_result; + pctx->base.set_active_query_state = lima_set_active_query_state; +} + diff --git a/src/gallium/drivers/lima/lima_resource.c b/src/gallium/drivers/lima/lima_resource.c new file mode 100644 index 00000000000..508b58a9c17 --- /dev/null +++ b/src/gallium/drivers/lima/lima_resource.c @@ -0,0 +1,589 @@ +/* + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_debug.h" +#include "util/u_transfer.h" +#include "util/u_surface.h" +#include "util/hash_table.h" +#include "util/u_drm.h" +#include "renderonly/renderonly.h" + +#include "state_tracker/drm_driver.h" + +#include "drm-uapi/drm_fourcc.h" +#include "drm-uapi/lima_drm.h" + +#include "lima_screen.h" +#include "lima_context.h" +#include "lima_resource.h" +#include "lima_bo.h" +#include "lima_util.h" +#include "lima_tiling.h" + +static struct pipe_resource * +lima_resource_create_scanout(struct pipe_screen *pscreen, + const struct pipe_resource *templat, + unsigned width, unsigned height) +{ + struct lima_screen *screen = lima_screen(pscreen); + struct renderonly_scanout *scanout; + struct winsys_handle handle; + struct pipe_resource *pres; + + struct pipe_resource scanout_templat = *templat; + scanout_templat.width0 = width; + scanout_templat.height0 = height; + scanout_templat.screen = pscreen; + + scanout = renderonly_scanout_for_resource(&scanout_templat, + screen->ro, &handle); + if (!scanout) + return NULL; + + assert(handle.type == WINSYS_HANDLE_TYPE_FD); + pres = pscreen->resource_from_handle(pscreen, templat, &handle, + PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE); + + close(handle.handle); + if (!pres) { + renderonly_scanout_destroy(scanout, screen->ro); + return NULL; + } + + struct lima_resource *res = lima_resource(pres); + res->scanout = scanout; + + return pres; +} + +static uint32_t +setup_miptree(struct lima_resource *res, + unsigned width0, unsigned height0, + bool should_align_dimensions) +{ + struct pipe_resource *pres = &res->base; + unsigned level; + unsigned width = width0; + unsigned height = height0; + unsigned depth = pres->depth0; + uint32_t size = 0; + + for (level = 0; level <= pres->last_level; level++) { + uint32_t actual_level_size; + uint32_t stride; + unsigned aligned_width; + unsigned aligned_height; + + if (should_align_dimensions) { + aligned_width = align(width, 16); + aligned_height = align(height, 16); + } else { + aligned_width = width; + aligned_height = height; + } + + stride = util_format_get_stride(pres->format, aligned_width); + actual_level_size = stride * + util_format_get_nblocksy(pres->format, aligned_height) * + pres->array_size * depth; + + res->levels[level].width = aligned_width; + res->levels[level].stride = stride; + res->levels[level].offset = size; + + /* The start address of each level <= 10 must be 64-aligned + * in order to be able to pass the addresses + * to the hardware. + * The start addresses of level 11 and level 12 are passed + * implicitely: they start at an offset of respectively + * 0x0400 and 0x0800 from the start address of level 10 */ + if (level < 10) + size += align(actual_level_size, 64); + else if (level != pres->last_level) + size += 0x0400; + else + size += actual_level_size; /* Save some memory */ + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + return size; +} + +static struct pipe_resource * +lima_resource_create_bo(struct pipe_screen *pscreen, + const struct pipe_resource *templat, + unsigned width, unsigned height, + bool should_align_dimensions) +{ + struct lima_screen *screen = lima_screen(pscreen); + struct lima_resource *res; + struct pipe_resource *pres; + + res = CALLOC_STRUCT(lima_resource); + if (!res) + return NULL; + + res->base = *templat; + res->base.screen = pscreen; + pipe_reference_init(&res->base.reference, 1); + + pres = &res->base; + + uint32_t size = setup_miptree(res, width, height, should_align_dimensions); + size = align(size, LIMA_PAGE_SIZE); + + res->bo = lima_bo_create(screen, size, 0); + if (!res->bo) { + FREE(res); + return NULL; + } + + return pres; +} + +static struct pipe_resource * +_lima_resource_create_with_modifiers(struct pipe_screen *pscreen, + const struct pipe_resource *templat, + const uint64_t *modifiers, + int count) +{ + struct lima_screen *screen = lima_screen(pscreen); + bool should_tile = false; + unsigned width, height; + bool should_align_dimensions; + + /* VBOs/PBOs are untiled (and 1 height). */ + if (templat->target == PIPE_BUFFER) + should_tile = false; + + if (templat->bind & (PIPE_BIND_LINEAR | PIPE_BIND_SCANOUT)) + should_tile = false; + + /* if linear buffer is not allowed, alloc fail */ + if (!should_tile && !drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count)) + return NULL; + + if (should_tile || (templat->bind & PIPE_BIND_RENDER_TARGET)) { + should_align_dimensions = true; + width = align(templat->width0, 16); + height = align(templat->height0, 16); + } + else { + should_align_dimensions = false; + width = templat->width0; + height = templat->height0; + } + + struct pipe_resource *pres; + if (screen->ro && (templat->bind & PIPE_BIND_SCANOUT)) + pres = lima_resource_create_scanout(pscreen, templat, width, height); + else + pres = lima_resource_create_bo(pscreen, templat, width, height, + should_align_dimensions); + + if (pres) { + struct lima_resource *res = lima_resource(pres); + res->tiled = should_tile; + + debug_printf("%s: pres=%p width=%u height=%u depth=%u target=%d " + "bind=%x usage=%d tile=%d last_level=%d\n", __func__, + pres, pres->width0, pres->height0, pres->depth0, + pres->target, pres->bind, pres->usage, should_tile, templat->last_level); + } + return pres; +} + +static struct pipe_resource * +lima_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *templat) +{ + static const uint64_t modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + }; + return _lima_resource_create_with_modifiers(pscreen, templat, modifiers, ARRAY_SIZE(modifiers)); +} + +static struct pipe_resource * +lima_resource_create_with_modifiers(struct pipe_screen *pscreen, + const struct pipe_resource *templat, + const uint64_t *modifiers, + int count) +{ + struct pipe_resource tmpl = *templat; + + /* gbm_bo_create_with_modifiers & gbm_surface_create_with_modifiers + * don't have usage parameter, but buffer created by these functions + * may be used for scanout. So we assume buffer created by this + * function always enable scanout if linear modifier is permitted. + */ + if (drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count)) + tmpl.bind |= PIPE_BIND_SCANOUT; + + return _lima_resource_create_with_modifiers(pscreen, &tmpl, modifiers, count); +} + +static void +lima_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *pres) +{ + struct lima_screen *screen = lima_screen(pscreen); + struct lima_resource *res = lima_resource(pres); + + if (res->bo) + lima_bo_free(res->bo); + + if (res->scanout) + renderonly_scanout_destroy(res->scanout, screen->ro); + + FREE(res); +} + +static struct pipe_resource * +lima_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *templat, + struct winsys_handle *handle, unsigned usage) +{ + struct lima_resource *res; + struct lima_screen *screen = lima_screen(pscreen); + + res = CALLOC_STRUCT(lima_resource); + if (!res) + return NULL; + + struct pipe_resource *pres = &res->base; + *pres = *templat; + pres->screen = pscreen; + pipe_reference_init(&pres->reference, 1); + res->levels[0].offset = 0; + res->levels[0].stride = handle->stride; + + res->bo = lima_bo_import(screen, handle); + if (!res->bo) { + FREE(res); + return NULL; + } + + /* check alignment for the buffer */ + if (pres->bind & PIPE_BIND_RENDER_TARGET) { + unsigned width, height, stride, size; + + width = align(pres->width0, 16); + height = align(pres->height0, 16); + stride = util_format_get_stride(pres->format, width); + size = util_format_get_2d_size(pres->format, stride, height); + + if (res->levels[0].stride != stride || res->bo->size < size) { + debug_error("import buffer not properly aligned\n"); + goto err_out; + } + + res->levels[0].width = width; + } + else + res->levels[0].width = pres->width0; + + handle->modifier = DRM_FORMAT_MOD_LINEAR; + res->tiled = false; + + return pres; + +err_out: + lima_resource_destroy(pscreen, pres); + return NULL; +} + +static boolean +lima_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_context *pctx, + struct pipe_resource *pres, + struct winsys_handle *handle, unsigned usage) +{ + struct lima_screen *screen = lima_screen(pscreen); + struct lima_resource *res = lima_resource(pres); + + handle->modifier = DRM_FORMAT_MOD_LINEAR; + + if (handle->type == WINSYS_HANDLE_TYPE_KMS && screen->ro && + renderonly_get_handle(res->scanout, handle)) + return TRUE; + + if (!lima_bo_export(res->bo, handle)) + return FALSE; + + handle->stride = res->levels[0].stride; + return TRUE; +} + +void +lima_resource_screen_init(struct lima_screen *screen) +{ + screen->base.resource_create = lima_resource_create; + screen->base.resource_create_with_modifiers = lima_resource_create_with_modifiers; + screen->base.resource_from_handle = lima_resource_from_handle; + screen->base.resource_destroy = lima_resource_destroy; + screen->base.resource_get_handle = lima_resource_get_handle; +} + +static struct pipe_surface * +lima_surface_create(struct pipe_context *pctx, + struct pipe_resource *pres, + const struct pipe_surface *surf_tmpl) +{ + struct lima_surface *surf = CALLOC_STRUCT(lima_surface); + + if (!surf) + return NULL; + + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + + struct pipe_surface *psurf = &surf->base; + unsigned level = surf_tmpl->u.tex.level; + + pipe_reference_init(&psurf->reference, 1); + pipe_resource_reference(&psurf->texture, pres); + + psurf->context = pctx; + psurf->format = surf_tmpl->format; + psurf->width = u_minify(pres->width0, level); + psurf->height = u_minify(pres->height0, level); + psurf->u.tex.level = level; + psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + + surf->tiled_w = align(psurf->width, 16) >> 4; + surf->tiled_h = align(psurf->height, 16) >> 4; + + struct lima_context *ctx = lima_context(pctx); + if (ctx->plb_pp_stream) { + struct lima_ctx_plb_pp_stream_key key = { + .tiled_w = surf->tiled_w, + .tiled_h = surf->tiled_h, + }; + + for (int i = 0; i < lima_ctx_num_plb; i++) { + key.plb_index = i; + + struct hash_entry *entry = + _mesa_hash_table_search(ctx->plb_pp_stream, &key); + if (entry) { + struct lima_ctx_plb_pp_stream *s = entry->data; + s->refcnt++; + } + else { + struct lima_ctx_plb_pp_stream *s = + ralloc(ctx->plb_pp_stream, struct lima_ctx_plb_pp_stream); + s->key.plb_index = i; + s->key.tiled_w = surf->tiled_w; + s->key.tiled_h = surf->tiled_h; + s->refcnt = 1; + s->bo = NULL; + _mesa_hash_table_insert(ctx->plb_pp_stream, &s->key, s); + } + } + } + + return &surf->base; +} + +static void +lima_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf) +{ + struct lima_surface *surf = lima_surface(psurf); + /* psurf->context may be not equal with pctx (i.e. glxinfo) */ + struct lima_context *ctx = lima_context(psurf->context); + + if (ctx->plb_pp_stream) { + struct lima_ctx_plb_pp_stream_key key = { + .tiled_w = surf->tiled_w, + .tiled_h = surf->tiled_h, + }; + + for (int i = 0; i < lima_ctx_num_plb; i++) { + key.plb_index = i; + + struct hash_entry *entry = + _mesa_hash_table_search(ctx->plb_pp_stream, &key); + struct lima_ctx_plb_pp_stream *s = entry->data; + if (--s->refcnt == 0) { + if (s->bo) + lima_bo_free(s->bo); + _mesa_hash_table_remove(ctx->plb_pp_stream, entry); + ralloc_free(s); + } + } + } + + pipe_resource_reference(&psurf->texture, NULL); + FREE(surf); +} + +static void * +lima_transfer_map(struct pipe_context *pctx, + struct pipe_resource *pres, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **pptrans) +{ + struct lima_context *ctx = lima_context(pctx); + struct lima_resource *res = lima_resource(pres); + struct lima_bo *bo = res->bo; + struct lima_transfer *trans; + struct pipe_transfer *ptrans; + + /* No direct mappings of tiled, since we need to manually + * tile/untile. + */ + if (res->tiled && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) + return NULL; + + /* use once buffers are made sure to not read/write overlapped + * range, so no need to sync */ + if (pres->usage != PIPE_USAGE_STREAM) { + if (usage & PIPE_TRANSFER_READ_WRITE) { + if (lima_need_flush(ctx, bo, usage & PIPE_TRANSFER_WRITE)) + lima_flush(ctx); + + unsigned op = usage & PIPE_TRANSFER_WRITE ? + LIMA_GEM_WAIT_WRITE : LIMA_GEM_WAIT_READ; + lima_bo_wait(bo, op, PIPE_TIMEOUT_INFINITE); + } + } + + if (!lima_bo_map(bo)) + return NULL; + + trans = slab_alloc(&ctx->transfer_pool); + if (!trans) + return NULL; + + memset(trans, 0, sizeof(*trans)); + ptrans = &trans->base; + + pipe_resource_reference(&ptrans->resource, pres); + ptrans->level = level; + ptrans->usage = usage; + ptrans->box = *box; + + *pptrans = ptrans; + + if (res->tiled) { + ptrans->stride = util_format_get_stride(pres->format, ptrans->box.width); + ptrans->layer_stride = ptrans->stride * ptrans->box.height; + + trans->staging = malloc(ptrans->stride * ptrans->box.height * ptrans->box.depth); + + if (usage & PIPE_TRANSFER_READ) + lima_load_tiled_image(trans->staging, bo->map + res->levels[level].offset, + &ptrans->box, + ptrans->stride, + res->levels[level].stride, + util_format_get_blocksize(pres->format)); + + return trans->staging; + } else { + ptrans->stride = res->levels[level].stride; + ptrans->layer_stride = ptrans->stride * box->height; + + return bo->map + res->levels[level].offset + + box->z * ptrans->layer_stride + + box->y / util_format_get_blockheight(pres->format) * ptrans->stride + + box->x / util_format_get_blockwidth(pres->format) * + util_format_get_blocksize(pres->format); + } +} + +static void +lima_transfer_flush_region(struct pipe_context *pctx, + struct pipe_transfer *ptrans, + const struct pipe_box *box) +{ + +} + +static void +lima_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *ptrans) +{ + struct lima_context *ctx = lima_context(pctx); + struct lima_transfer *trans = lima_transfer(ptrans); + struct lima_resource *res = lima_resource(ptrans->resource); + struct lima_bo *bo = res->bo; + struct pipe_resource *pres; + + if (trans->staging) { + pres = &res->base; + if (ptrans->usage & PIPE_TRANSFER_WRITE) + lima_store_tiled_image(bo->map + res->levels[ptrans->level].offset, trans->staging, + &ptrans->box, + res->levels[ptrans->level].stride, + ptrans->stride, + util_format_get_blocksize(pres->format)); + free(trans->staging); + } + + pipe_resource_reference(&ptrans->resource, NULL); + slab_free(&ctx->transfer_pool, trans); +} + +static void +lima_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) +{ + debug_error("lima_blit not implemented\n"); +} + +static void +lima_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource) +{ + +} + +void +lima_resource_context_init(struct lima_context *ctx) +{ + ctx->base.create_surface = lima_surface_create; + ctx->base.surface_destroy = lima_surface_destroy; + + /* TODO: optimize these functions to read/write data directly + * from/to target instead of creating a staging memory for tiled + * buffer indirectly + */ + ctx->base.buffer_subdata = u_default_buffer_subdata; + ctx->base.texture_subdata = u_default_texture_subdata; + ctx->base.resource_copy_region = util_resource_copy_region; + + ctx->base.blit = lima_blit; + + ctx->base.transfer_map = lima_transfer_map; + ctx->base.transfer_flush_region = lima_transfer_flush_region; + ctx->base.transfer_unmap = lima_transfer_unmap; + + ctx->base.flush_resource = lima_flush_resource; +} diff --git a/src/gallium/drivers/lima/lima_resource.h b/src/gallium/drivers/lima/lima_resource.h new file mode 100644 index 00000000000..ba88b0696fe --- /dev/null +++ b/src/gallium/drivers/lima/lima_resource.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef H_LIMA_RESOURCE +#define H_LIMA_RESOURCE + +#include "pipe/p_state.h" + +/* max texture size is 4096x4096 */ +#define LIMA_MAX_MIP_LEVELS 13 + +struct lima_screen; + +struct lima_resource_level { + uint32_t width; + uint32_t stride; + uint32_t offset; +}; + +struct lima_resource { + struct pipe_resource base; + + struct renderonly_scanout *scanout; + struct lima_bo *bo; + bool tiled; + + struct lima_resource_level levels[LIMA_MAX_MIP_LEVELS]; +}; + +struct lima_surface { + struct pipe_surface base; + int tiled_w, tiled_h; + bool reload; +}; + +struct lima_transfer { + struct pipe_transfer base; + void *staging; +}; + +static inline struct lima_resource * +lima_resource(struct pipe_resource *res) +{ + return (struct lima_resource *)res; +} + +static inline struct lima_surface * +lima_surface(struct pipe_surface *surf) +{ + return (struct lima_surface *)surf; +} + +static inline struct lima_transfer * +lima_transfer(struct pipe_transfer *trans) +{ + return (struct lima_transfer *)trans; +} + +void +lima_resource_screen_init(struct lima_screen *screen); + +void +lima_resource_context_init(struct lima_context *ctx); + +#endif diff --git a/src/gallium/drivers/lima/lima_screen.c b/src/gallium/drivers/lima/lima_screen.c new file mode 100644 index 00000000000..8615003cb81 --- /dev/null +++ b/src/gallium/drivers/lima/lima_screen.c @@ -0,0 +1,546 @@ +/* + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include + +#include "util/ralloc.h" +#include "util/u_debug.h" +#include "util/u_screen.h" +#include "renderonly/renderonly.h" + +#include "drm-uapi/drm_fourcc.h" +#include "drm-uapi/lima_drm.h" + +#include "lima_screen.h" +#include "lima_context.h" +#include "lima_resource.h" +#include "lima_program.h" +#include "lima_bo.h" +#include "lima_fence.h" +#include "ir/lima_ir.h" + +#include "xf86drm.h" + +static void +lima_screen_destroy(struct pipe_screen *pscreen) +{ + struct lima_screen *screen = lima_screen(pscreen); + + if (lima_dump_command_stream) { + fclose(lima_dump_command_stream); + lima_dump_command_stream = NULL; + } + + slab_destroy_parent(&screen->transfer_pool); + + if (screen->ro) + free(screen->ro); + + if (screen->gp_buffer) + lima_bo_free(screen->gp_buffer); + + if (screen->pp_buffer) + lima_bo_free(screen->pp_buffer); + + lima_bo_table_fini(screen); + ralloc_free(screen); +} + +static const char * +lima_screen_get_name(struct pipe_screen *pscreen) +{ + struct lima_screen *screen = lima_screen(pscreen); + + switch (screen->gpu_type) { + case DRM_LIMA_PARAM_GPU_ID_MALI400: + return "Mali400"; + case DRM_LIMA_PARAM_GPU_ID_MALI450: + return "Mali450"; + } + + return NULL; +} + +static const char * +lima_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "lima"; +} + +static const char * +lima_screen_get_device_vendor(struct pipe_screen *pscreen) +{ + return "ARM"; +} + +static int +lima_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_ACCELERATED: + case PIPE_CAP_UMA: + case PIPE_CAP_NATIVE_FENCE_FD: + return 1; + + /* Unimplemented, but for exporting OpenGL 2.0 */ + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_POINT_SPRITE: + return 1; + + /* not clear supported */ + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return LIMA_MAX_MIP_LEVELS; + + case PIPE_CAP_VENDOR_ID: + return 0x13B5; + + case PIPE_CAP_VIDEO_MEMORY: + return 0; + + case PIPE_CAP_PCI_GROUP: + case PIPE_CAP_PCI_BUS: + case PIPE_CAP_PCI_DEVICE: + case PIPE_CAP_PCI_FUNCTION: + return 0; + + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 0; + + default: + return u_pipe_screen_get_param_defaults(pscreen, param); + } +} + +static float +lima_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 255.0f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + + default: + return 0.0f; + } +} + +static int +get_vertex_shader_param(struct lima_screen *screen, + enum pipe_shader_cap param) +{ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; /* need investigate */ + + case PIPE_SHADER_CAP_MAX_INPUTS: + return 16; /* attributes */ + + case PIPE_SHADER_CAP_MAX_OUTPUTS: + return LIMA_MAX_VARYING_NUM; /* varying */ + + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 4096; /* need investigate */ + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 1; + + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_NIR; + + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; /* need investigate */ + + default: + return 0; + } +} + +static int +get_fragment_shader_param(struct lima_screen *screen, + enum pipe_shader_cap param) +{ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; /* need investigate */ + + case PIPE_SHADER_CAP_MAX_INPUTS: + return LIMA_MAX_VARYING_NUM - 1; /* varying, minus gl_Position */ + + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 4096; /* need investigate */ + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 1; + + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return 16; /* need investigate */ + + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_NIR; + + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; /* need investigate */ + + default: + return 0; + } +} + +static int +lima_screen_get_shader_param(struct pipe_screen *pscreen, + enum pipe_shader_type shader, + enum pipe_shader_cap param) +{ + struct lima_screen *screen = lima_screen(pscreen); + + switch (shader) { + case PIPE_SHADER_FRAGMENT: + return get_fragment_shader_param(screen, param); + case PIPE_SHADER_VERTEX: + return get_vertex_shader_param(screen, param); + + default: + return 0; + } +} + +static boolean +lima_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned storage_sample_count, + unsigned usage) +{ + switch (target) { + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + break; + default: + return FALSE; + } + + if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) + return false; + + /* be able to support 16, now limit to 4 */ + if (sample_count > 1 && sample_count != 4) + return FALSE; + + if (usage & PIPE_BIND_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + break; + default: + return FALSE; + } + } + + if (usage & PIPE_BIND_DEPTH_STENCIL) { + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24X8_UNORM: + break; + default: + return FALSE; + } + } + + if (usage & PIPE_BIND_VERTEX_BUFFER) { + switch (format) { + case PIPE_FORMAT_R32G32B32_FLOAT: + break; + default: + return FALSE; + } + } + + if (usage & PIPE_BIND_INDEX_BUFFER) { + switch (format) { + case PIPE_FORMAT_I8_UINT: + case PIPE_FORMAT_I16_UINT: + case PIPE_FORMAT_I32_UINT: + break; + default: + return FALSE; + } + } + + if (usage & PIPE_BIND_SAMPLER_VIEW) { + switch (format) { + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_B8G8R8A8_SRGB: + break; + default: + return FALSE; + } + } + + return TRUE; +} + +static const void * +lima_screen_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, + enum pipe_shader_type shader) +{ + return lima_program_get_compiler_options(shader); +} + +static bool +lima_screen_query_info(struct lima_screen *screen) +{ + struct drm_lima_get_param param; + + memset(¶m, 0, sizeof(param)); + param.param = DRM_LIMA_PARAM_GPU_ID; + if (drmIoctl(screen->fd, DRM_IOCTL_LIMA_GET_PARAM, ¶m)) + return false; + + switch (param.value) { + case DRM_LIMA_PARAM_GPU_ID_MALI400: + case DRM_LIMA_PARAM_GPU_ID_MALI450: + screen->gpu_type = param.value; + break; + default: + return false; + } + + memset(¶m, 0, sizeof(param)); + param.param = DRM_LIMA_PARAM_NUM_PP; + if (drmIoctl(screen->fd, DRM_IOCTL_LIMA_GET_PARAM, ¶m)) + return false; + + screen->num_pp = param.value; + + return true; +} + +static void +lima_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen, + enum pipe_format format, int max, + uint64_t *modifiers, + unsigned int *external_only, + int *count) +{ + uint64_t available_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + }; + + if (!modifiers) { + *count = ARRAY_SIZE(available_modifiers); + return; + } + + for (int i = 0; i < *count; i++) { + modifiers[i] = available_modifiers[i]; + if (external_only) + external_only = false; + } +} + +static const struct debug_named_value debug_options[] = { + { "gp", LIMA_DEBUG_GP, + "print GP shader compiler result of each stage" }, + { "pp", LIMA_DEBUG_PP, + "print PP shader compiler result of each stage" }, + { "dump", LIMA_DEBUG_DUMP, + "dump GPU command stream to $PWD/lima.dump" }, + { NULL } +}; + +DEBUG_GET_ONCE_FLAGS_OPTION(lima_debug, "LIMA_DEBUG", debug_options, 0) +uint32_t lima_debug; + +static void +lima_screen_parse_env(void) +{ + lima_debug = debug_get_option_lima_debug(); + + if (lima_debug & LIMA_DEBUG_DUMP) { + const char *dump_command = "lima.dump"; + printf("lima: dump command stream to file %s\n", dump_command); + lima_dump_command_stream = fopen(dump_command, "w"); + if (!lima_dump_command_stream) + fprintf(stderr, "lima: fail to open command stream log file %s\n", + dump_command); + } + + lima_ctx_num_plb = debug_get_num_option("LIMA_CTX_NUM_PLB", LIMA_CTX_PLB_DEF_NUM); + if (lima_ctx_num_plb > LIMA_CTX_PLB_MAX_NUM || + lima_ctx_num_plb < LIMA_CTX_PLB_MIN_NUM) { + fprintf(stderr, "lima: LIMA_CTX_NUM_PLB %d out of range [%d %d], " + "reset to default %d\n", lima_ctx_num_plb, LIMA_CTX_PLB_MIN_NUM, + LIMA_CTX_PLB_MAX_NUM, LIMA_CTX_PLB_DEF_NUM); + lima_ctx_num_plb = LIMA_CTX_PLB_DEF_NUM; + } + + lima_ppir_force_spilling = debug_get_num_option("LIMA_PPIR_FORCE_SPILLING", 0); + if (lima_ppir_force_spilling < 0) { + fprintf(stderr, "lima: LIMA_PPIR_FORCE_SPILLING %d less than 0, " + "reset to default 0\n", lima_ppir_force_spilling); + lima_ppir_force_spilling = 0; + } +} + +struct pipe_screen * +lima_screen_create(int fd, struct renderonly *ro) +{ + struct lima_screen *screen; + + screen = rzalloc(NULL, struct lima_screen); + if (!screen) + return NULL; + + screen->fd = fd; + + if (!lima_screen_query_info(screen)) + goto err_out0; + + if (!lima_bo_table_init(screen)) + goto err_out0; + + screen->pp_ra = ppir_regalloc_init(screen); + if (!screen->pp_ra) + goto err_out1; + + screen->gp_buffer = lima_bo_create(screen, gp_buffer_size, 0); + if (!screen->gp_buffer) + goto err_out1; + + screen->pp_buffer = lima_bo_create(screen, pp_buffer_size, 0); + if (!screen->pp_buffer) + goto err_out2; + + /* fs program for clear buffer? + * const0 1 0 0 -1.67773, mov.v0 $0 ^const0.xxxx, stop + */ + static const uint32_t pp_clear_program[] = { + 0x00020425, 0x0000000c, 0x01e007cf, 0xb0000000, + 0x000005f5, 0x00000000, 0x00000000, 0x00000000, + }; + memcpy(lima_bo_map(screen->pp_buffer) + pp_clear_program_offset, + pp_clear_program, sizeof(pp_clear_program)); + + /* copy texture to framebuffer, used to reload gpu tile buffer + * load.v $1 0.xy, texld_2d 0, mov.v0 $0 ^tex_sampler, sync, stop + */ + static const uint32_t pp_reload_program[] = { + 0x000005e6, 0xf1003c20, 0x00000000, 0x39001000, + 0x00000e4e, 0x000007cf, 0x00000000, 0x00000000, + }; + memcpy(lima_bo_map(screen->pp_buffer) + pp_reload_program_offset, + pp_reload_program, sizeof(pp_reload_program)); + + /* 0/1/2 vertex index for reload/clear draw */ + static const uint8_t pp_shared_index[] = { 0, 1, 2 }; + memcpy(lima_bo_map(screen->pp_buffer) + pp_shared_index_offset, + pp_shared_index, sizeof(pp_shared_index)); + + /* 4096x4096 gl pos used for partial clear */ + static const float pp_clear_gl_pos[] = { + 4096, 0, 1, 1, + 0, 0, 1, 1, + 0, 4096, 1, 1, + }; + memcpy(lima_bo_map(screen->pp_buffer) + pp_clear_gl_pos_offset, + pp_clear_gl_pos, sizeof(pp_clear_gl_pos)); + + /* is pp frame render state static? */ + uint32_t *pp_frame_rsw = lima_bo_map(screen->pp_buffer) + pp_frame_rsw_offset; + memset(pp_frame_rsw, 0, 0x40); + pp_frame_rsw[8] = 0x0000f008; + pp_frame_rsw[9] = screen->pp_buffer->va + pp_clear_program_offset; + pp_frame_rsw[13] = 0x00000100; + + if (ro) { + screen->ro = renderonly_dup(ro); + if (!screen->ro) { + fprintf(stderr, "Failed to dup renderonly object\n"); + goto err_out3; + } + } + + screen->base.destroy = lima_screen_destroy; + screen->base.get_name = lima_screen_get_name; + screen->base.get_vendor = lima_screen_get_vendor; + screen->base.get_device_vendor = lima_screen_get_device_vendor; + screen->base.get_param = lima_screen_get_param; + screen->base.get_paramf = lima_screen_get_paramf; + screen->base.get_shader_param = lima_screen_get_shader_param; + screen->base.context_create = lima_context_create; + screen->base.is_format_supported = lima_screen_is_format_supported; + screen->base.get_compiler_options = lima_screen_get_compiler_options; + screen->base.query_dmabuf_modifiers = lima_screen_query_dmabuf_modifiers; + + lima_resource_screen_init(screen); + lima_fence_screen_init(screen); + + slab_create_parent(&screen->transfer_pool, sizeof(struct lima_transfer), 16); + + screen->refcnt = 1; + + lima_screen_parse_env(); + + return &screen->base; + +err_out3: + lima_bo_free(screen->pp_buffer); +err_out2: + lima_bo_free(screen->gp_buffer); +err_out1: + lima_bo_table_fini(screen); +err_out0: + ralloc_free(screen); + return NULL; +} diff --git a/src/gallium/drivers/lima/lima_screen.h b/src/gallium/drivers/lima/lima_screen.h new file mode 100644 index 00000000000..31797729c1f --- /dev/null +++ b/src/gallium/drivers/lima/lima_screen.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef H_LIMA_SCREEN +#define H_LIMA_SCREEN + +#include + +#include "util/slab.h" +#include "util/list.h" +#include "os/os_thread.h" + +#include "pipe/p_screen.h" + +#define LIMA_DEBUG_GP (1 << 0) +#define LIMA_DEBUG_PP (1 << 1) +#define LIMA_DEBUG_DUMP (1 << 2) + +extern uint32_t lima_debug; +extern FILE *lima_dump_command_stream; +extern int lima_ctx_num_plb; +extern int lima_ppir_force_spilling; + +struct ra_regs; + +struct lima_screen { + struct pipe_screen base; + struct renderonly *ro; + + int refcnt; + void *winsys_priv; + + int fd; + int gpu_type; + int num_pp; + + /* bo table */ + mtx_t bo_table_lock; + struct util_hash_table *bo_handles; + struct util_hash_table *bo_flink_names; + + struct slab_parent_pool transfer_pool; + + struct ra_regs *pp_ra; + + struct lima_bo *gp_buffer; + #define gp_tile_heap_offset 0x000000 + #define gp_buffer_size 0x100000 + + struct lima_bo *pp_buffer; + #define pp_frame_rsw_offset 0x0000 + #define pp_clear_program_offset 0x0040 + #define pp_reload_program_offset 0x0080 + #define pp_shared_index_offset 0x00c0 + #define pp_clear_gl_pos_offset 0x0100 + #define pp_stack_offset 0x1000 + #define pp_stack_pp_size 0x400 /* per pp, up to 8 pp */ + #define pp_stack_offset_end 0x3000 + #define pp_buffer_size 0x3000 + +}; + +static inline struct lima_screen * +lima_screen(struct pipe_screen *pscreen) +{ + return (struct lima_screen *)pscreen; +} + +struct pipe_screen * +lima_screen_create(int fd, struct renderonly *ro); + +#endif diff --git a/src/gallium/drivers/lima/lima_state.c b/src/gallium/drivers/lima/lima_state.c new file mode 100644 index 00000000000..3691dd9b58d --- /dev/null +++ b/src/gallium/drivers/lima/lima_state.c @@ -0,0 +1,506 @@ +/* + * Copyright (c) 2011-2013 Luc Verhaegen + * Copyright (c) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_helpers.h" +#include "util/u_debug.h" + +#include "pipe/p_state.h" + +#include "lima_screen.h" +#include "lima_context.h" +#include "lima_resource.h" + +static void +lima_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *framebuffer) +{ + struct lima_context *ctx = lima_context(pctx); + + /* submit need framebuffer info, flush before change it */ + lima_flush(ctx); + + struct lima_context_framebuffer *fb = &ctx->framebuffer; + + fb->samples = framebuffer->samples; + + pipe_surface_reference(&fb->cbuf, framebuffer->cbufs[0]); + pipe_surface_reference(&fb->zsbuf, framebuffer->zsbuf); + + /* need align here? */ + fb->width = framebuffer->width; + fb->height = framebuffer->height; + + int width = align(framebuffer->width, 16) >> 4; + int height = align(framebuffer->height, 16) >> 4; + if (fb->tiled_w != width || fb->tiled_h != height) { + fb->tiled_w = width; + fb->tiled_h = height; + + fb->shift_h = 0; + fb->shift_w = 0; + + int limit = ctx->plb_max_blk; + while ((width * height) > limit) { + if (width >= height) { + width = (width + 1) >> 1; + fb->shift_w++; + } else { + height = (height + 1) >> 1; + fb->shift_h++; + } + } + + fb->block_w = width; + fb->block_h = height; + + fb->shift_min = MIN3(fb->shift_w, fb->shift_h, 2); + + debug_printf("fb dim change tiled=%d/%d block=%d/%d shift=%d/%d/%d\n", + fb->tiled_w, fb->tiled_h, fb->block_w, fb->block_h, + fb->shift_w, fb->shift_h, fb->shift_min); + } + + ctx->dirty |= LIMA_CONTEXT_DIRTY_FRAMEBUFFER; +} + +static void +lima_set_polygon_stipple(struct pipe_context *pctx, + const struct pipe_poly_stipple *stipple) +{ + +} + +static void * +lima_create_depth_stencil_alpha_state(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct lima_depth_stencil_alpha_state *so; + + so = CALLOC_STRUCT(lima_depth_stencil_alpha_state); + if (!so) + return NULL; + + so->base = *cso; + + return so; +} + +static void +lima_bind_depth_stencil_alpha_state(struct pipe_context *pctx, void *hwcso) +{ + struct lima_context *ctx = lima_context(pctx); + + ctx->zsa = hwcso; + ctx->dirty |= LIMA_CONTEXT_DIRTY_ZSA; +} + +static void +lima_delete_depth_stencil_alpha_state(struct pipe_context *pctx, void *hwcso) +{ + FREE(hwcso); +} + +static void * +lima_create_rasterizer_state(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct lima_rasterizer_state *so; + + so = CALLOC_STRUCT(lima_rasterizer_state); + if (!so) + return NULL; + + so->base = *cso; + + return so; +} + +static void +lima_bind_rasterizer_state(struct pipe_context *pctx, void *hwcso) +{ + struct lima_context *ctx = lima_context(pctx); + + ctx->rasterizer = hwcso; + ctx->dirty |= LIMA_CONTEXT_DIRTY_RASTERIZER; +} + +static void +lima_delete_rasterizer_state(struct pipe_context *pctx, void *hwcso) +{ + FREE(hwcso); +} + +static void * +lima_create_blend_state(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + struct lima_blend_state *so; + + so = CALLOC_STRUCT(lima_blend_state); + if (!so) + return NULL; + + so->base = *cso; + + return so; +} + +static void +lima_bind_blend_state(struct pipe_context *pctx, void *hwcso) +{ + struct lima_context *ctx = lima_context(pctx); + + ctx->blend = hwcso; + ctx->dirty |= LIMA_CONTEXT_DIRTY_BLEND; +} + +static void +lima_delete_blend_state(struct pipe_context *pctx, void *hwcso) +{ + FREE(hwcso); +} + +static void * +lima_create_vertex_elements_state(struct pipe_context *pctx, unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct lima_vertex_element_state *so; + + so = CALLOC_STRUCT(lima_vertex_element_state); + if (!so) + return NULL; + + memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + so->num_elements = num_elements; + + return so; +} + +static void +lima_bind_vertex_elements_state(struct pipe_context *pctx, void *hwcso) +{ + struct lima_context *ctx = lima_context(pctx); + + ctx->vertex_elements = hwcso; + ctx->dirty |= LIMA_CONTEXT_DIRTY_VERTEX_ELEM; +} + +static void +lima_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso) +{ + FREE(hwcso); +} + +static void +lima_set_vertex_buffers(struct pipe_context *pctx, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct lima_context *ctx = lima_context(pctx); + struct lima_context_vertex_buffer *so = &ctx->vertex_buffers; + + util_set_vertex_buffers_mask(so->vb + start_slot, &so->enabled_mask, + vb, start_slot, count); + so->count = util_last_bit(so->enabled_mask); + + ctx->dirty |= LIMA_CONTEXT_DIRTY_VERTEX_BUFF; +} + +static void +lima_set_viewport_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *viewport) +{ + struct lima_context *ctx = lima_context(pctx); + + /* reverse calculate the parameter of glViewport */ + ctx->viewport.x = viewport->translate[0] - viewport->scale[0]; + ctx->viewport.y = fabsf(viewport->translate[1] - fabsf(viewport->scale[1])); + ctx->viewport.width = viewport->scale[0] * 2; + ctx->viewport.height = fabsf(viewport->scale[1] * 2); + + /* reverse calculate the parameter of glDepthRange */ + ctx->viewport.near = viewport->translate[2] - viewport->scale[2]; + ctx->viewport.far = viewport->translate[2] + viewport->scale[2]; + + ctx->viewport.transform = *viewport; + ctx->dirty |= LIMA_CONTEXT_DIRTY_VIEWPORT; +} + +static void +lima_set_scissor_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct lima_context *ctx = lima_context(pctx); + + ctx->scissor = *scissor; + ctx->dirty |= LIMA_CONTEXT_DIRTY_SCISSOR; +} + +static void +lima_set_blend_color(struct pipe_context *pctx, + const struct pipe_blend_color *blend_color) +{ + struct lima_context *ctx = lima_context(pctx); + + ctx->blend_color = *blend_color; + ctx->dirty |= LIMA_CONTEXT_DIRTY_BLEND_COLOR; +} + +static void +lima_set_stencil_ref(struct pipe_context *pctx, + const struct pipe_stencil_ref *stencil_ref) +{ + struct lima_context *ctx = lima_context(pctx); + + ctx->stencil_ref = *stencil_ref; + ctx->dirty |= LIMA_CONTEXT_DIRTY_STENCIL_REF; +} + +static void +lima_set_constant_buffer(struct pipe_context *pctx, + enum pipe_shader_type shader, uint index, + const struct pipe_constant_buffer *cb) +{ + struct lima_context *ctx = lima_context(pctx); + struct lima_context_constant_buffer *so = ctx->const_buffer + shader; + + assert(index == 0); + + if (unlikely(!cb)) { + so->buffer = NULL; + so->size = 0; + } else { + assert(!cb->buffer); + + so->buffer = cb->user_buffer + cb->buffer_offset; + so->size = cb->buffer_size; + } + + so->dirty = true; + ctx->dirty |= LIMA_CONTEXT_DIRTY_CONST_BUFF; + +} + +static void * +lima_create_sampler_state(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + struct lima_sampler_state *so = CALLOC_STRUCT(lima_sampler_state); + if (!so) + return NULL; + + memcpy(so, cso, sizeof(*cso)); + + return so; +} + +static void +lima_sampler_state_delete(struct pipe_context *pctx, void *sstate) +{ + free(sstate); +} + +static void +lima_sampler_states_bind(struct pipe_context *pctx, + enum pipe_shader_type shader, unsigned start, + unsigned nr, void **hwcso) +{ + struct lima_context *ctx = lima_context(pctx); + struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj; + unsigned i; + unsigned new_nr = 0; + + assert(start == 0); + + for (i = 0; i < nr; i++) { + if (hwcso[i]) + new_nr = i + 1; + lima_tex->samplers[i] = hwcso[i]; + } + + for (; i < lima_tex->num_samplers; i++) { + lima_tex->samplers[i] = NULL; + } + + lima_tex->num_samplers = new_nr; + ctx->dirty |= LIMA_CONTEXT_DIRTY_TEXTURES; +} + +static struct pipe_sampler_view * +lima_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct lima_sampler_view *so = CALLOC_STRUCT(lima_sampler_view); + + if (!so) + return NULL; + + so->base = *cso; + + pipe_reference(NULL, &prsc->reference); + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + return &so->base; +} + +static void +lima_sampler_view_destroy(struct pipe_context *pctx, + struct pipe_sampler_view *pview) +{ + struct lima_sampler_view *view = lima_sampler_view(pview); + + pipe_resource_reference(&pview->texture, NULL); + + free(view); +} + +static void +lima_set_sampler_views(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct lima_context *ctx = lima_context(pctx); + struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj; + int i; + unsigned new_nr = 0; + + assert(start == 0); + + for (i = 0; i < nr; i++) { + if (views[i]) + new_nr = i + 1; + pipe_sampler_view_reference(&lima_tex->textures[i], views[i]); + } + + for (; i < lima_tex->num_textures; i++) { + pipe_sampler_view_reference(&lima_tex->textures[i], NULL); + } + + lima_tex->num_textures = new_nr; + ctx->dirty |= LIMA_CONTEXT_DIRTY_TEXTURES; +} + +static boolean +lima_set_damage_region(struct pipe_context *pctx, unsigned num_rects, int *rects) +{ + struct lima_context *ctx = lima_context(pctx); + struct lima_damage_state *damage = &ctx->damage; + int i; + + if (damage->region) + ralloc_free(damage->region); + + if (!num_rects) { + damage->region = NULL; + damage->num_region = 0; + return true; + } + + damage->region = ralloc_size(ctx, sizeof(*damage->region) * num_rects); + if (!damage->region) { + damage->num_region = 0; + return false; + } + + for (i = 0; i < num_rects; i++) { + struct pipe_scissor_state *r = damage->region + i; + /* region in tile unit */ + r->minx = rects[i * 4] >> 4; + r->miny = rects[i * 4 + 1] >> 4; + r->maxx = (rects[i * 4] + rects[i * 4 + 2] + 0xf) >> 4; + r->maxy = (rects[i * 4 + 1] + rects[i * 4 + 3] + 0xf) >> 4; + } + + /* is region aligned to tiles? */ + damage->aligned = true; + for (i = 0; i < num_rects * 4; i++) { + if (rects[i] & 0xf) { + damage->aligned = false; + break; + } + } + + damage->num_region = num_rects; + return true; +} + +void +lima_state_init(struct lima_context *ctx) +{ + ctx->base.set_framebuffer_state = lima_set_framebuffer_state; + ctx->base.set_polygon_stipple = lima_set_polygon_stipple; + ctx->base.set_viewport_states = lima_set_viewport_states; + ctx->base.set_scissor_states = lima_set_scissor_states; + ctx->base.set_blend_color = lima_set_blend_color; + ctx->base.set_stencil_ref = lima_set_stencil_ref; + + ctx->base.set_vertex_buffers = lima_set_vertex_buffers; + ctx->base.set_constant_buffer = lima_set_constant_buffer; + + ctx->base.create_depth_stencil_alpha_state = lima_create_depth_stencil_alpha_state; + ctx->base.bind_depth_stencil_alpha_state = lima_bind_depth_stencil_alpha_state; + ctx->base.delete_depth_stencil_alpha_state = lima_delete_depth_stencil_alpha_state; + + ctx->base.create_rasterizer_state = lima_create_rasterizer_state; + ctx->base.bind_rasterizer_state = lima_bind_rasterizer_state; + ctx->base.delete_rasterizer_state = lima_delete_rasterizer_state; + + ctx->base.create_blend_state = lima_create_blend_state; + ctx->base.bind_blend_state = lima_bind_blend_state; + ctx->base.delete_blend_state = lima_delete_blend_state; + + ctx->base.create_vertex_elements_state = lima_create_vertex_elements_state; + ctx->base.bind_vertex_elements_state = lima_bind_vertex_elements_state; + ctx->base.delete_vertex_elements_state = lima_delete_vertex_elements_state; + + ctx->base.create_sampler_state = lima_create_sampler_state; + ctx->base.delete_sampler_state = lima_sampler_state_delete; + ctx->base.bind_sampler_states = lima_sampler_states_bind; + + ctx->base.create_sampler_view = lima_create_sampler_view; + ctx->base.sampler_view_destroy = lima_sampler_view_destroy; + ctx->base.set_sampler_views = lima_set_sampler_views; +} + +void +lima_state_fini(struct lima_context *ctx) +{ + struct lima_context_vertex_buffer *so = &ctx->vertex_buffers; + + util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, NULL, + 0, ARRAY_SIZE(so->vb)); + + pipe_surface_reference(&ctx->framebuffer.cbuf, NULL); + pipe_surface_reference(&ctx->framebuffer.zsbuf, NULL); +} diff --git a/src/gallium/drivers/lima/lima_submit.c b/src/gallium/drivers/lima/lima_submit.c new file mode 100644 index 00000000000..83c78bf82da --- /dev/null +++ b/src/gallium/drivers/lima/lima_submit.c @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2017-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include + +#include "xf86drm.h" +#include "libsync.h" +#include "drm-uapi/lima_drm.h" + +#include "util/ralloc.h" +#include "util/u_dynarray.h" +#include "util/os_time.h" + +#include "lima_screen.h" +#include "lima_context.h" +#include "lima_submit.h" +#include "lima_bo.h" +#include "lima_util.h" + +struct lima_submit { + struct lima_screen *screen; + uint32_t pipe; + uint32_t ctx; + + int in_sync_fd; + uint32_t in_sync; + uint32_t out_sync; + + struct util_dynarray gem_bos; + struct util_dynarray bos; +}; + + +#define VOID2U64(x) ((uint64_t)(unsigned long)(x)) + +struct lima_submit *lima_submit_create(struct lima_context *ctx, uint32_t pipe) +{ + struct lima_submit *s; + + s = rzalloc(ctx, struct lima_submit); + if (!s) + return NULL; + + s->screen = lima_screen(ctx->base.screen); + s->pipe = pipe; + s->ctx = ctx->id; + s->in_sync_fd = -1; + + int err = drmSyncobjCreate(s->screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &s->out_sync); + if (err) + goto err_out0; + + err = drmSyncobjCreate(s->screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &s->in_sync); + if (err) + goto err_out1; + + util_dynarray_init(&s->gem_bos, s); + + return s; + +err_out1: + drmSyncobjDestroy(s->screen->fd, s->out_sync); +err_out0: + ralloc_free(s); + return NULL; +} + +void lima_submit_free(struct lima_submit *submit) +{ + if (submit->in_sync_fd >= 0) + close(submit->in_sync_fd); + drmSyncobjDestroy(submit->screen->fd, submit->in_sync); + drmSyncobjDestroy(submit->screen->fd, submit->out_sync); +} + +bool lima_submit_add_bo(struct lima_submit *submit, struct lima_bo *bo, uint32_t flags) +{ + util_dynarray_foreach(&submit->gem_bos, struct drm_lima_gem_submit_bo, gem_bo) { + if (bo->handle == gem_bo->handle) { + gem_bo->flags |= flags; + return true; + } + } + + struct drm_lima_gem_submit_bo *submit_bo = + util_dynarray_grow(&submit->gem_bos, sizeof(*submit_bo)); + submit_bo->handle = bo->handle; + submit_bo->flags = flags; + + struct lima_bo **jbo = util_dynarray_grow(&submit->bos, sizeof(*jbo)); + *jbo = bo; + + /* prevent bo from being freed when submit start */ + lima_bo_reference(bo); + + return true; +} + +bool lima_submit_start(struct lima_submit *submit, void *frame, uint32_t size) +{ + struct drm_lima_gem_submit req = { + .ctx = submit->ctx, + .pipe = submit->pipe, + .nr_bos = submit->gem_bos.size / sizeof(struct drm_lima_gem_submit_bo), + .bos = VOID2U64(util_dynarray_begin(&submit->gem_bos)), + .frame = VOID2U64(frame), + .frame_size = size, + }; + + if (submit->in_sync_fd >= 0) { + int err = drmSyncobjImportSyncFile(submit->screen->fd, submit->in_sync, + submit->in_sync_fd); + if (err) + return false; + + req.in_sync[0] = submit->in_sync; + close(submit->in_sync_fd); + submit->in_sync_fd = -1; + } + + bool ret = drmIoctl(submit->screen->fd, DRM_IOCTL_LIMA_GEM_SUBMIT, &req) == 0; + + util_dynarray_foreach(&submit->bos, struct lima_bo *, bo) { + lima_bo_free(*bo); + } + + util_dynarray_clear(&submit->gem_bos); + util_dynarray_clear(&submit->bos); + return ret; +} + +bool lima_submit_wait(struct lima_submit *submit, uint64_t timeout_ns) +{ + int64_t abs_timeout = os_time_get_absolute_timeout(timeout_ns); + + return !drmSyncobjWait(submit->screen->fd, &submit->out_sync, 1, abs_timeout, 0, NULL); +} + +bool lima_submit_has_bo(struct lima_submit *submit, struct lima_bo *bo, bool all) +{ + util_dynarray_foreach(&submit->gem_bos, struct drm_lima_gem_submit_bo, gem_bo) { + if (bo->handle == gem_bo->handle) { + if (all) + return true; + else + return gem_bo->flags & LIMA_SUBMIT_BO_WRITE; + } + } + + return false; +} + +bool lima_submit_add_in_sync(struct lima_submit *submit, int fd) +{ + return !sync_accumulate("lima", &submit->in_sync_fd, fd); +} + +bool lima_submit_get_out_sync(struct lima_submit *submit, int *fd) +{ + return !drmSyncobjExportSyncFile(submit->screen->fd, submit->out_sync, fd); +} diff --git a/src/gallium/drivers/lima/lima_submit.h b/src/gallium/drivers/lima/lima_submit.h new file mode 100644 index 00000000000..216e0fee83d --- /dev/null +++ b/src/gallium/drivers/lima/lima_submit.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2018-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef H_LIMA_SUBMIT +#define H_LIMA_SUBMIT + +#include +#include + +struct lima_context; +struct lima_submit; +struct lima_bo; + +struct lima_submit *lima_submit_create(struct lima_context *ctx, uint32_t pipe); +void lima_submit_free(struct lima_submit *submit); +bool lima_submit_add_bo(struct lima_submit *submit, struct lima_bo *bo, uint32_t flags); +bool lima_submit_start(struct lima_submit *submit, void *frame, uint32_t size); +bool lima_submit_wait(struct lima_submit *submit, uint64_t timeout_ns); +bool lima_submit_has_bo(struct lima_submit *submit, struct lima_bo *bo, bool all); +bool lima_submit_add_in_sync(struct lima_submit *submit, int fd); +bool lima_submit_get_out_sync(struct lima_submit *submit, int *fd); + +#endif diff --git a/src/gallium/drivers/lima/lima_texture.c b/src/gallium/drivers/lima/lima_texture.c new file mode 100644 index 00000000000..548d9839ff0 --- /dev/null +++ b/src/gallium/drivers/lima/lima_texture.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2011-2013 Luc Verhaegen + * Copyright (c) 2018-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" +#include "util/u_math.h" +#include "util/u_debug.h" +#include "util/u_transfer.h" + +#include "lima_bo.h" +#include "lima_context.h" +#include "lima_screen.h" +#include "lima_texture.h" +#include "lima_resource.h" +#include "lima_submit.h" +#include "lima_util.h" + +#include + +#define LIMA_TEXEL_FORMAT_BGR_565 0x0e +#define LIMA_TEXEL_FORMAT_RGB_888 0x15 +#define LIMA_TEXEL_FORMAT_RGBA_8888 0x16 +#define LIMA_TEXEL_FORMAT_RGBX_8888 0x17 + +#define lima_tex_list_size 64 + +static uint32_t pipe_format_to_lima(enum pipe_format pformat) +{ + unsigned swap_chans = 0, flag1 = 0, format; + + switch (pformat) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + swap_chans = 1; + case PIPE_FORMAT_B8G8R8A8_UNORM: + format = LIMA_TEXEL_FORMAT_RGBA_8888; + break; + case PIPE_FORMAT_R8G8B8X8_UNORM: + swap_chans = 1; + case PIPE_FORMAT_B8G8R8X8_UNORM: + format = LIMA_TEXEL_FORMAT_RGBX_8888; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + swap_chans = 1; + format = LIMA_TEXEL_FORMAT_RGB_888; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + format = LIMA_TEXEL_FORMAT_BGR_565; + break; + default: + assert(0); + break; + } + + return (swap_chans << 7) | (flag1 << 6) | format; +} + +void +lima_texture_desc_set_res(struct lima_context *ctx, uint32_t *desc, + struct pipe_resource *prsc, + unsigned first_level, unsigned last_level) +{ + unsigned width, height, layout, i; + struct lima_resource *lima_res = lima_resource(prsc); + + width = prsc->width0; + height = prsc->height0; + if (first_level != 0) { + width = u_minify(width, first_level); + height = u_minify(height, first_level); + } + + desc[0] |= pipe_format_to_lima(prsc->format); + desc[2] |= (width << 22); + desc[3] |= 0x10000 | (height << 3) | (width >> 10); + + if (lima_res->tiled) + layout = 3; + else { + /* for padded linear texture */ + if (lima_res->levels[first_level].width != width) { + desc[0] |= lima_res->levels[first_level].width << 18; + desc[2] |= 0x100; + } + layout = 0; + } + + lima_submit_add_bo(ctx->pp_submit, lima_res->bo, LIMA_SUBMIT_BO_READ); + + uint32_t base_va = lima_res->bo->va; + + /* attach level 0 */ + desc[6] |= (base_va << 24) | (layout << 13); + desc[7] |= base_va >> 8; + + /* Attach remaining levels. + * Each subsequent mipmap address is specified using the 26 msbs. + * These addresses are then packed continuously in memory */ + unsigned current_desc_index = 7; + unsigned current_desc_bit_index = 24; + for (i = 1; i < LIMA_MAX_MIP_LEVELS; i++) { + if (first_level + i > last_level) + break; + + uint32_t address = base_va + lima_res->levels[i].offset; + address = (address >> 6); + desc[current_desc_index] |= (address << current_desc_bit_index); + if (current_desc_bit_index <= 6) { + current_desc_bit_index += 26; + if (current_desc_bit_index >= 32) { + current_desc_bit_index &= 0x1F; + current_desc_index++; + } + continue; + } + desc[current_desc_index + 1] |= (address >> (32 - current_desc_bit_index)); + current_desc_bit_index = (current_desc_bit_index + 26) & 0x1F; + current_desc_index++; + } +} + +static void +lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sampler, + struct lima_sampler_view *texture, void *pdesc) +{ + uint32_t *desc = pdesc; + unsigned first_level; + unsigned last_level; + bool mipmapping; + + memset(desc, 0, lima_tex_desc_size); + + /* 2D texture */ + desc[1] |= 0x400; + + desc[1] &= ~0xff000000; + switch (sampler->base.min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + first_level = texture->base.u.tex.first_level; + last_level = texture->base.u.tex.last_level; + if (last_level - first_level >= LIMA_MAX_MIP_LEVELS) + last_level = first_level + LIMA_MAX_MIP_LEVELS - 1; + mipmapping = true; + desc[1] |= ((last_level - first_level) << 24); + desc[2] &= ~0x0600; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + first_level = texture->base.u.tex.first_level; + last_level = texture->base.u.tex.last_level; + if (last_level - first_level >= LIMA_MAX_MIP_LEVELS) + last_level = first_level + LIMA_MAX_MIP_LEVELS - 1; + mipmapping = true; + desc[1] |= ((last_level - first_level) << 24); + desc[2] |= 0x0600; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + first_level = 0; + last_level = 0; + mipmapping = false; + desc[2] &= ~0x0600; + break; + } + + switch (sampler->base.mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + desc[2] &= ~0x1000; + /* no mipmap, filter_mag = linear */ + if (!mipmapping) + desc[1] |= 0x80000000; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + desc[2] |= 0x1000; + break; + } + + switch (sampler->base.min_img_filter) { + break; + case PIPE_TEX_FILTER_LINEAR: + desc[2] &= ~0x0800; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + desc[2] |= 0x0800; + break; + } + + /* Only clamp, clamp to edge, repeat and mirror repeat are supported */ + desc[2] &= ~0xe000; + switch (sampler->base.wrap_s) { + case PIPE_TEX_WRAP_CLAMP: + desc[2] |= 0x4000; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + desc[2] |= 0x2000; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + desc[2] |= 0x8000; + break; + case PIPE_TEX_WRAP_REPEAT: + default: + break; + } + + /* Only clamp, clamp to edge, repeat and mirror repeat are supported */ + desc[2] &= ~0x070000; + switch (sampler->base.wrap_t) { + case PIPE_TEX_WRAP_CLAMP: + desc[2] |= 0x020000; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + desc[2] |= 0x010000; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + desc[2] |= 0x040000; + break; + case PIPE_TEX_WRAP_REPEAT: + default: + break; + } + + lima_texture_desc_set_res(ctx, desc, texture->base.texture, + first_level, last_level); +} + +void +lima_update_textures(struct lima_context *ctx) +{ + struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj; + + assert (lima_tex->num_samplers <= 16); + + /* Nothing to do - we have no samplers or textures */ + if (!lima_tex->num_samplers || !lima_tex->num_textures) + return; + + unsigned size = lima_tex_list_size + lima_tex->num_samplers * lima_tex_desc_size; + uint32_t *descs = + lima_ctx_buff_alloc(ctx, lima_ctx_buff_pp_tex_desc, size, true); + + for (int i = 0; i < lima_tex->num_samplers; i++) { + off_t offset = lima_tex_desc_size * i + lima_tex_list_size; + struct lima_sampler_state *sampler = lima_sampler_state(lima_tex->samplers[i]); + struct lima_sampler_view *texture = lima_sampler_view(lima_tex->textures[i]); + + descs[i] = lima_ctx_buff_va(ctx, lima_ctx_buff_pp_tex_desc, + LIMA_CTX_BUFF_SUBMIT_PP) + offset; + lima_update_tex_desc(ctx, sampler, texture, (void *)descs + offset); + } + + lima_dump_command_stream_print( + descs, size, false, "add textures_desc at va %x\n", + lima_ctx_buff_va(ctx, lima_ctx_buff_pp_tex_desc, 0)); +} diff --git a/src/gallium/drivers/lima/lima_texture.h b/src/gallium/drivers/lima/lima_texture.h new file mode 100644 index 00000000000..bf7d735c168 --- /dev/null +++ b/src/gallium/drivers/lima/lima_texture.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2018-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef H_LIMA_TEXTURE +#define H_LIMA_TEXTURE + +#define lima_tex_desc_size 64 + +void lima_texture_desc_set_res(struct lima_context *ctx, uint32_t *desc, + struct pipe_resource *prsc, + unsigned first_level, unsigned last_level); +void lima_update_textures(struct lima_context *ctx); + +#endif diff --git a/src/gallium/drivers/lima/lima_tiling.c b/src/gallium/drivers/lima/lima_tiling.c new file mode 100644 index 00000000000..6332e47055d --- /dev/null +++ b/src/gallium/drivers/lima/lima_tiling.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2011-2013 Luc Verhaegen + * Copyright (c) 2018 Alyssa Rosenzweig + * Copyright (c) 2018 Vasily Khoruzhick + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "lima_tiling.h" + +uint32_t space_filler[16][16] = { + { 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85, }, + { 3, 2, 7, 6, 19, 18, 23, 22, 67, 66, 71, 70, 83, 82, 87, 86, }, + { 12, 13, 8, 9, 28, 29, 24, 25, 76, 77, 72, 73, 92, 93, 88, 89, }, + { 15, 14, 11, 10, 31, 30, 27, 26, 79, 78, 75, 74, 95, 94, 91, 90, }, + { 48, 49, 52, 53, 32, 33, 36, 37, 112, 113, 116, 117, 96, 97, 100, 101, }, + { 51, 50, 55, 54, 35, 34, 39, 38, 115, 114, 119, 118, 99, 98, 103, 102, }, + { 60, 61, 56, 57, 44, 45, 40, 41, 124, 125, 120, 121, 108, 109, 104, 105, }, + { 63, 62, 59, 58, 47, 46, 43, 42, 127, 126, 123, 122, 111, 110, 107, 106, }, + { 192, 193, 196, 197, 208, 209, 212, 213, 128, 129, 132, 133, 144, 145, 148, 149, }, + { 195, 194, 199, 198, 211, 210, 215, 214, 131, 130, 135, 134, 147, 146, 151, 150, }, + { 204, 205, 200, 201, 220, 221, 216, 217, 140, 141, 136, 137, 156, 157, 152, 153, }, + { 207, 206, 203, 202, 223, 222, 219, 218, 143, 142, 139, 138, 159, 158, 155, 154, }, + { 240, 241, 244, 245, 224, 225, 228, 229, 176, 177, 180, 181, 160, 161, 164, 165, }, + { 243, 242, 247, 246, 227, 226, 231, 230, 179, 178, 183, 182, 163, 162, 167, 166, }, + { 252, 253, 248, 249, 236, 237, 232, 233, 188, 189, 184, 185, 172, 173, 168, 169, }, + { 255, 254, 251, 250, 239, 238, 235, 234, 191, 190, 187, 186, 175, 174, 171, 170, }, +}; + +static void +lima_store_tiled_image_bpp4(void *dst, const void *src, + const struct pipe_box *box, + uint32_t dst_stride, + uint32_t src_stride) +{ + for (int y = box->y, src_y = 0; src_y < box->height; ++y, ++src_y) { + int block_y = y & ~0x0f; + int rem_y = y & 0x0F; + int block_start_s = block_y * dst_stride; + int source_start = src_y * src_stride; + + for (int x = box->x, src_x = 0; src_x < box->width; ++x, ++src_x) { + int block_x_s = (x >> 4) * 256; + int rem_x = x & 0x0F; + + int index = space_filler[rem_y][rem_x]; + const uint32_t *source = src + source_start + 4 * src_x; + uint32_t *dest = dst + block_start_s + 4 * (block_x_s + index); + + *dest = *source; + } + } +} + +static void +lima_store_tiled_image_generic(void *dst, const void *src, + const struct pipe_box *box, + uint32_t dst_stride, + uint32_t src_stride, + uint32_t bpp) +{ + for (int y = box->y, src_y = 0; src_y < box->height; ++y, ++src_y) { + int block_y = y & ~0x0f; + int rem_y = y & 0x0F; + int block_start_s = block_y * dst_stride; + int source_start = src_y * src_stride; + + for (int x = box->x, src_x = 0; src_x < box->width; ++x, ++src_x) { + int block_x_s = (x >> 4) * 256; + int rem_x = x & 0x0F; + + int index = space_filler[rem_y][rem_x]; + const uint8_t *src8 = src; + const uint8_t *source = &src8[source_start + bpp * src_x]; + uint8_t *dest = dst + block_start_s + bpp * (block_x_s + index); + + for (int b = 0; b < bpp; ++b) + dest[b] = source[b]; + } + } +} + +static void +lima_load_tiled_image_bpp4(void *dst, const void *src, + const struct pipe_box *box, + uint32_t dst_stride, + uint32_t src_stride) +{ + for (int y = box->y, dest_y = 0; dest_y < box->height; ++y, ++dest_y) { + int block_y = y & ~0x0f; + int rem_y = y & 0x0F; + int block_start_s = block_y * src_stride; + int dest_start = dest_y * dst_stride; + + for (int x = box->x, dest_x = 0; dest_x < box->width; ++x, ++dest_x) { + int block_x_s = (x >> 4) * 256; + int rem_x = x & 0x0F; + + int index = space_filler[rem_y][rem_x]; + uint32_t *dest = dst + dest_start + 4 * dest_x; + const uint32_t *source = src + block_start_s + 4 * (block_x_s + index); + + *dest = *source; + } + } +} + +static void +lima_load_tiled_image_generic(void *dst, const void *src, + const struct pipe_box *box, + uint32_t dst_stride, + uint32_t src_stride, + uint32_t bpp) +{ + for (int y = box->y, dest_y = 0; dest_y < box->height; ++y, ++dest_y) { + int block_y = y & ~0x0f; + int rem_y = y & 0x0F; + int block_start_s = block_y * src_stride; + int dest_start = dest_y * dst_stride; + + for (int x = box->x, dest_x = 0; dest_x < box->width; ++x, ++dest_x) { + int block_x_s = (x >> 4) * 256; + int rem_x = x & 0x0F; + + int index = space_filler[rem_y][rem_x]; + uint8_t *dst8 = dst; + uint8_t *dest = &dst8[dest_start + bpp * dest_x]; + const uint8_t *source = src + block_start_s + bpp * (block_x_s + index); + + for (int b = 0; b < bpp; ++b) + dest[b] = source[b]; + } + } +} + +void +lima_store_tiled_image(void *dst, const void *src, + const struct pipe_box *box, + uint32_t dst_stride, + uint32_t src_stride, + uint32_t bpp) +{ + switch (bpp) { + case 4: + lima_store_tiled_image_bpp4(dst, src, box, dst_stride, src_stride); + break; + default: + lima_store_tiled_image_generic(dst, src, box, dst_stride, src_stride, bpp); + } +} + +void +lima_load_tiled_image(void *dst, const void *src, + const struct pipe_box *box, + uint32_t dst_stride, + uint32_t src_stride, + uint32_t bpp) +{ + switch (bpp) { + case 4: + lima_load_tiled_image_bpp4(dst, src, box, dst_stride, src_stride); + break; + default: + lima_load_tiled_image_generic(dst, src, box, dst_stride, src_stride, bpp); + } +} diff --git a/src/gallium/drivers/lima/lima_tiling.h b/src/gallium/drivers/lima/lima_tiling.h new file mode 100644 index 00000000000..ea3a4219d66 --- /dev/null +++ b/src/gallium/drivers/lima/lima_tiling.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011-2013 Luc Verhaegen + * Copyright (c) 2018 Alyssa Rosenzweig + * Copyright (c) 2018 Vasily Khoruzhick + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef H_LIMA_TILING +#define H_LIMA_TILING + +#include "util/u_box.h" + +void lima_load_tiled_image(void *dst, const void *src, + const struct pipe_box *box, + uint32_t dst_stride, + uint32_t src_stride, + uint32_t bpp); + +void lima_store_tiled_image(void *dst, const void *src, + const struct pipe_box *box, + uint32_t dst_stride, + uint32_t src_stride, + uint32_t bpp); + +#endif diff --git a/src/gallium/drivers/lima/lima_util.c b/src/gallium/drivers/lima/lima_util.c new file mode 100644 index 00000000000..9e1cdf646b9 --- /dev/null +++ b/src/gallium/drivers/lima/lima_util.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2018-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include + +#include + +#include "lima_util.h" + +FILE *lima_dump_command_stream = NULL; + +bool lima_get_absolute_timeout(uint64_t *timeout) +{ + struct timespec current; + uint64_t current_ns; + + if (*timeout == PIPE_TIMEOUT_INFINITE) + return true; + + if (clock_gettime(CLOCK_MONOTONIC, ¤t)) + return false; + + current_ns = ((uint64_t)current.tv_sec) * 1000000000ull; + current_ns += current.tv_nsec; + *timeout += current_ns; + + return true; +} + +void lima_dump_blob(FILE *fp, void *data, int size, bool is_float) +{ + for (int i = 0; i * 4 < size; i++) { + if (i % 4 == 0) { + if (i) fprintf(fp, "\n"); + fprintf(fp, "%04x:", i * 4); + } + + if (is_float) + fprintf(fp, " %f", ((float *)data)[i]); + else + fprintf(fp, " 0x%08x", ((uint32_t *)data)[i]); + } + fprintf(fp, "\n"); +} + +void +lima_dump_command_stream_print(void *data, int size, bool is_float, + const char *fmt, ...) +{ + if (lima_dump_command_stream) { + va_list ap; + va_start(ap, fmt); + vfprintf(lima_dump_command_stream, fmt, ap); + va_end(ap); + + lima_dump_blob(lima_dump_command_stream, data, size, is_float); + } +} diff --git a/src/gallium/drivers/lima/lima_util.h b/src/gallium/drivers/lima/lima_util.h new file mode 100644 index 00000000000..ad9f42158bf --- /dev/null +++ b/src/gallium/drivers/lima/lima_util.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2018-2019 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef H_LIMA_UTIL +#define H_LIMA_UTIL + +#include +#include + +#define LIMA_PAGE_SIZE 4096 + +bool lima_get_absolute_timeout(uint64_t *timeout); +void lima_dump_blob(FILE *fp, void *data, int size, bool is_float); +void lima_dump_command_stream_print(void *data, int size, bool is_float, + const char *fmt, ...); + +#endif diff --git a/src/gallium/drivers/lima/meson.build b/src/gallium/drivers/lima/meson.build new file mode 100644 index 00000000000..a05f0e275ed --- /dev/null +++ b/src/gallium/drivers/lima/meson.build @@ -0,0 +1,89 @@ +# Copyright © 2018 Lima Project +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +files_lima = files( + 'ir/gp/gpir.h', + 'ir/gp/nir.c', + 'ir/gp/node.c', + 'ir/gp/lower.c', + 'ir/gp/scheduler.c', + 'ir/gp/instr.c', + 'ir/gp/codegen.h', + 'ir/gp/codegen.c', + 'ir/gp/reduce_scheduler.c', + 'ir/gp/value_regalloc.c', + 'ir/gp/physical_regalloc.c', + 'ir/gp/disasm.c', + + 'ir/pp/ppir.h', + 'ir/pp/nir.c', + 'ir/pp/node.c', + 'ir/pp/lower.c', + 'ir/pp/scheduler.c', + 'ir/pp/instr.c', + 'ir/pp/regalloc.c', + 'ir/pp/codegen.h', + 'ir/pp/codegen.c', + 'ir/pp/node_to_instr.c', + 'ir/pp/disasm.c', + + 'ir/lima_nir_lower_uniform_to_scalar.c', + + 'ir/lima_ir.h', + + 'lima_screen.c', + 'lima_screen.h', + 'lima_context.c', + 'lima_context.h', + 'lima_resource.c', + 'lima_resource.h', + 'lima_state.c', + 'lima_draw.c', + 'lima_program.c', + 'lima_query.c', + 'lima_bo.c', + 'lima_bo.h', + 'lima_submit.c', + 'lima_submit.h', + 'lima_util.c', + 'lima_util.h', + 'lima_texture.c', + 'lima_texture.h', + 'lima_fence.c', + 'lima_fence.h', + 'lima_tiling.c', + 'lima_tiling.h', +) + +liblima = static_library( + 'lima', + files_lima, + include_directories : [ + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers, + ], + dependencies : [dep_libdrm, idep_nir_headers], +) + +driver_lima = declare_dependency( + compile_args : '-DGALLIUM_LIMA', + link_with : [liblima, liblimawinsys], + dependencies : idep_nir, +) diff --git a/src/gallium/meson.build b/src/gallium/meson.build index ad685632a02..a9efb6296b6 100644 --- a/src/gallium/meson.build +++ b/src/gallium/meson.build @@ -143,6 +143,12 @@ if with_gallium_virgl else driver_virgl = declare_dependency() endif +if with_gallium_lima + subdir('winsys/lima/drm') + subdir('drivers/lima') +else + driver_lima = declare_dependency() +endif if with_gallium_opencl # TODO: this isn't really clover specific, but ATM clover is the only # consumer diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build index b3d3cd09c5b..fb8bd97c416 100644 --- a/src/gallium/targets/dri/meson.build +++ b/src/gallium/targets/dri/meson.build @@ -58,7 +58,7 @@ libgallium_dri = shared_library( driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau, driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv, driver_tegra, driver_i915, driver_svga, driver_virgl, - driver_swr, driver_panfrost, driver_iris + driver_swr, driver_panfrost, driver_iris, driver_lima ], # Will be deleted during installation, see install_megadrivers.py install : true, @@ -93,7 +93,8 @@ foreach d : [[with_gallium_kmsro, [ [with_gallium_r300, 'r300_dri.so'], [with_gallium_r600, 'r600_dri.so'], [with_gallium_svga, 'vmwgfx_dri.so'], - [with_gallium_virgl, 'virtio_gpu_dri.so']] + [with_gallium_virgl, 'virtio_gpu_dri.so'], + [with_gallium_lima, 'lima_dri.so']] if d[0] gallium_dri_drivers += d[1] endif diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c index e5baf754487..c702058b0c9 100644 --- a/src/gallium/targets/dri/target.c +++ b/src/gallium/targets/dri/target.c @@ -105,3 +105,6 @@ DEFINE_LOADER_DRM_ENTRYPOINT(st7586) DEFINE_LOADER_DRM_ENTRYPOINT(st7735r) #endif +#if defined(GALLIUM_LIMA) +DEFINE_LOADER_DRM_ENTRYPOINT(lima) +#endif diff --git a/src/gallium/winsys/lima/drm/lima_drm_public.h b/src/gallium/winsys/lima/drm/lima_drm_public.h new file mode 100644 index 00000000000..06ca46499d4 --- /dev/null +++ b/src/gallium/winsys/lima/drm/lima_drm_public.h @@ -0,0 +1,35 @@ +/* + * Copyright © 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __LIMA_DRM_PUBLIC_H__ +#define __LIMA_DRM_PUBLIC_H__ + +#include + +struct pipe_screen; +struct renderonly; + +struct pipe_screen *lima_drm_screen_create(int drmFD); +struct pipe_screen *lima_drm_screen_create_renderonly(struct renderonly *ro); + +#endif /* __LIMA_DRM_PUBLIC_H__ */ diff --git a/src/gallium/winsys/lima/drm/lima_drm_winsys.c b/src/gallium/winsys/lima/drm/lima_drm_winsys.c new file mode 100644 index 00000000000..07690307c64 --- /dev/null +++ b/src/gallium/winsys/lima/drm/lima_drm_winsys.c @@ -0,0 +1,124 @@ +/* + * Copyright © 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "c11/threads.h" +#include "util/u_hash_table.h" +#include "util/u_pointer.h" +#include "renderonly/renderonly.h" + +#include "lima_drm_public.h" + +#include "lima/lima_screen.h" + +static struct util_hash_table *fd_tab = NULL; +static mtx_t lima_screen_mutex = _MTX_INITIALIZER_NP; + +static void +lima_drm_screen_destroy(struct pipe_screen *pscreen) +{ + struct lima_screen *screen = lima_screen(pscreen); + boolean destroy; + int fd = screen->fd; + + mtx_lock(&lima_screen_mutex); + destroy = --screen->refcnt == 0; + if (destroy) + util_hash_table_remove(fd_tab, intptr_to_pointer(fd)); + mtx_unlock(&lima_screen_mutex); + + if (destroy) { + pscreen->destroy = screen->winsys_priv; + pscreen->destroy(pscreen); + close(fd); + } +} + +static unsigned hash_fd(void *key) +{ + int fd = pointer_to_intptr(key); + struct stat stat; + + fstat(fd, &stat); + + return stat.st_dev ^ stat.st_ino ^ stat.st_rdev; +} + +static int compare_fd(void *key1, void *key2) +{ + int fd1 = pointer_to_intptr(key1); + int fd2 = pointer_to_intptr(key2); + struct stat stat1, stat2; + + fstat(fd1, &stat1); + fstat(fd2, &stat2); + + return stat1.st_dev != stat2.st_dev || + stat1.st_ino != stat2.st_ino || + stat1.st_rdev != stat2.st_rdev; +} + +struct pipe_screen * +lima_drm_screen_create(int fd) +{ + struct pipe_screen *pscreen = NULL; + + mtx_lock(&lima_screen_mutex); + if (!fd_tab) { + fd_tab = util_hash_table_create(hash_fd, compare_fd); + if (!fd_tab) + goto unlock; + } + + pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd)); + if (pscreen) { + lima_screen(pscreen)->refcnt++; + } else { + int dup_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + + pscreen = lima_screen_create(dup_fd, NULL); + if (pscreen) { + util_hash_table_set(fd_tab, intptr_to_pointer(dup_fd), pscreen); + + /* Bit of a hack, to avoid circular linkage dependency, + * ie. pipe driver having to call in to winsys, we + * override the pipe drivers screen->destroy(): + */ + lima_screen(pscreen)->winsys_priv = pscreen->destroy; + pscreen->destroy = lima_drm_screen_destroy; + } + } + +unlock: + mtx_unlock(&lima_screen_mutex); + return pscreen; +} + +struct pipe_screen * +lima_drm_screen_create_renderonly(struct renderonly *ro) +{ + return lima_screen_create(fcntl(ro->gpu_fd, F_DUPFD_CLOEXEC, 3), ro); +} diff --git a/src/gallium/winsys/lima/drm/meson.build b/src/gallium/winsys/lima/drm/meson.build new file mode 100644 index 00000000000..2939571e590 --- /dev/null +++ b/src/gallium/winsys/lima/drm/meson.build @@ -0,0 +1,29 @@ +# Copyright © 2018 Lima Project +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +liblimawinsys = static_library( + 'limawinsys', + files('lima_drm_winsys.c'), + include_directories : [ + inc_src, inc_include, + inc_gallium, inc_gallium_aux, inc_gallium_drivers, + ], +) -- 2.30.2