src/pipeline/pipeline.cpp

   1 /*
   2  * Copyright 2017 Jacob Lifshay
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in all
  12  * copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  *
  22  */
  23 #include "pipeline.h"
  24 #include "spirv_to_llvm/spirv_to_llvm.h"
  25 #include "llvm_wrapper/llvm_wrapper.h"
  26 #include "llvm_wrapper/orc_compile_stack.h"
  27 #include "vulkan/util.h"
  28 #include "util/soft_float.h"
  29 #include "json/json.h"
  30 #include <stdexcept>
  31 #include <cassert>
  32 #include <vector>
  33 #include <iostream>
  34
  35 namespace kazan
  36 {
  37 namespace pipeline
  38 {
  39 llvm_wrapper::Module Pipeline::optimize_module(llvm_wrapper::Module module,
  40                                                ::LLVMTargetMachineRef target_machine)
  41 {
  42     switch(llvm_wrapper::Target_machine::get_code_gen_opt_level(target_machine))
  43     {
  44     case ::LLVMCodeGenLevelNone:
  45     case ::LLVMCodeGenLevelLess:
  46         break;
  47     case ::LLVMCodeGenLevelDefault:
  48     case ::LLVMCodeGenLevelAggressive:
  49     {
  50 #warning finish implementing module optimizations
  51         {
  52             auto manager = llvm_wrapper::Pass_manager::create_function_pass_manager(module.get());
  53             ::LLVMAddAnalysisPasses(target_machine, manager.get());
  54             ::LLVMAddPromoteMemoryToRegisterPass(manager.get());
  55             ::LLVMAddScalarReplAggregatesPass(manager.get());
  56             ::LLVMAddScalarizerPass(manager.get());
  57             ::LLVMAddEarlyCSEMemSSAPass(manager.get());
  58             ::LLVMAddSCCPPass(manager.get());
  59             ::LLVMAddAggressiveDCEPass(manager.get());
  60             ::LLVMAddLICMPass(manager.get());
  61             ::LLVMAddIndVarSimplifyPass(manager.get());
  62             ::LLVMAddCFGSimplificationPass(manager.get());
  63             ::LLVMAddReassociatePass(manager.get());
  64             ::LLVMAddInstructionCombiningPass(manager.get());
  65             ::LLVMAddNewGVNPass(manager.get());
  66             ::LLVMAddCorrelatedValuePropagationPass(manager.get());
  67             ::LLVMInitializeFunctionPassManager(manager.get());
  68             for(auto fn = ::LLVMGetFirstFunction(module.get()); fn; fn = ::LLVMGetNextFunction(fn))
  69                 ::LLVMRunFunctionPassManager(manager.get(), fn);
  70             ::LLVMFinalizeFunctionPassManager(manager.get());
  71         }
  72         {
  73             auto manager = llvm_wrapper::Pass_manager::create_module_pass_manager();
  74             ::LLVMAddAnalysisPasses(target_machine, manager.get());
  75             ::LLVMAddIPSCCPPass(manager.get());
  76             ::LLVMAddFunctionInliningPass(manager.get());
  77             ::LLVMAddDeadArgEliminationPass(manager.get());
  78             ::LLVMAddGlobalDCEPass(manager.get());
  79             ::LLVMRunPassManager(manager.get(), module.get());
  80         }
  81         {
  82             auto manager = llvm_wrapper::Pass_manager::create_function_pass_manager(module.get());
  83             ::LLVMAddAnalysisPasses(target_machine, manager.get());
  84             ::LLVMAddCFGSimplificationPass(manager.get());
  85             ::LLVMAddPromoteMemoryToRegisterPass(manager.get());
  86             ::LLVMAddScalarReplAggregatesPass(manager.get());
  87             ::LLVMAddLICMPass(manager.get());
  88             ::LLVMAddIndVarSimplifyPass(manager.get());
  89             ::LLVMAddReassociatePass(manager.get());
  90             ::LLVMAddInstructionCombiningPass(manager.get());
  91             ::LLVMAddLoopUnrollPass(manager.get());
  92             ::LLVMAddSLPVectorizePass(manager.get());
  93             ::LLVMAddAggressiveDCEPass(manager.get());
  94             ::LLVMInitializeFunctionPassManager(manager.get());
  95             for(auto fn = ::LLVMGetFirstFunction(module.get()); fn; fn = ::LLVMGetNextFunction(fn))
  96                 ::LLVMRunFunctionPassManager(manager.get(), fn);
  97             ::LLVMFinalizeFunctionPassManager(manager.get());
  98         }
  99         std::cerr << "optimized module:" << std::endl;
 100         ::LLVMDumpModule(module.get());
 101         break;
 102     }
 103     }
 104     return module;
 105 }
 106
 107 struct Graphics_pipeline::Implementation
 108 {
 109     llvm_wrapper::Context llvm_context = llvm_wrapper::Context::create();
 110     spirv_to_llvm::Jit_symbol_resolver jit_symbol_resolver;
 111     llvm_wrapper::Orc_compile_stack jit_stack;
 112     llvm_wrapper::Target_data data_layout;
 113     std::vector<spirv_to_llvm::Converted_module> compiled_shaders;
 114     std::shared_ptr<spirv_to_llvm::Struct_type_descriptor> vertex_shader_output_struct;
 115     std::string append_value_to_string(std::string str,
 116                                        spirv_to_llvm::Type_descriptor &type,
 117                                        const void *value) const
 118     {
 119         struct Visitor : public spirv_to_llvm::Type_descriptor::Type_visitor
 120         {
 121             const Implementation *this_;
 122             std::string &str;
 123             const void *value;
 124             Visitor(const Implementation *this_, std::string &str, const void *value) noexcept
 125                 : this_(this_),
 126                   str(str),
 127                   value(value)
 128             {
 129             }
 130             virtual void visit(spirv_to_llvm::Simple_type_descriptor &type) override
 131             {
 132                 auto llvm_type = type.get_or_make_type().type;
 133                 switch(::LLVMGetTypeKind(llvm_type))
 134                 {
 135                 case ::LLVMVoidTypeKind:
 136                 case ::LLVMX86_FP80TypeKind:
 137                 case ::LLVMFP128TypeKind:
 138                 case ::LLVMPPC_FP128TypeKind:
 139                 case ::LLVMLabelTypeKind:
 140                 case ::LLVMFunctionTypeKind:
 141                 case ::LLVMStructTypeKind:
 142                 case ::LLVMArrayTypeKind:
 143                 case ::LLVMPointerTypeKind:
 144                 case ::LLVMVectorTypeKind:
 145                 case ::LLVMMetadataTypeKind:
 146                 case ::LLVMX86_MMXTypeKind:
 147                 case ::LLVMTokenTypeKind:
 148                     break;
 149                 case ::LLVMHalfTypeKind:
 150                 {
 151                     auto integer_value = *static_cast<const std::uint16_t *>(value);
 152                     auto float_value =
 153                         util::soft_float::ExtendedFloat::fromHalfPrecision(integer_value);
 154                     str = json::ast::Number_value::append_double_to_string(
 155                         static_cast<double>(float_value), std::move(str));
 156                     if(float_value.isNaN())
 157                     {
 158                         str += " (0x";
 159                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 160                             integer_value, std::move(str), 0x10);
 161                         str += ")";
 162                     }
 163                     return;
 164                 }
 165                 case ::LLVMFloatTypeKind:
 166                 {
 167                     static_assert(sizeof(std::uint32_t) == sizeof(float)
 168                                       && alignof(std::uint32_t) == alignof(float),
 169                                   "");
 170                     union
 171                     {
 172                         std::uint32_t integer_value;
 173                         float float_value;
 174                     };
 175                     integer_value = *static_cast<const std::uint32_t *>(value);
 176                     str = json::ast::Number_value::append_double_to_string(float_value,
 177                                                                            std::move(str));
 178                     if(std::isnan(float_value))
 179                     {
 180                         str += " (0x";
 181                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 182                             integer_value, std::move(str), 0x10);
 183                         str += ")";
 184                     }
 185                     return;
 186                 }
 187                 case ::LLVMDoubleTypeKind:
 188                 {
 189                     static_assert(sizeof(std::uint64_t) == sizeof(double)
 190                                       && alignof(std::uint64_t) == alignof(double),
 191                                   "");
 192                     union
 193                     {
 194                         std::uint64_t integer_value;
 195                         double float_value;
 196                     };
 197                     integer_value = *static_cast<const std::uint64_t *>(value);
 198                     str = json::ast::Number_value::append_double_to_string(float_value,
 199                                                                            std::move(str));
 200                     if(std::isnan(float_value))
 201                     {
 202                         str += " (0x";
 203                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 204                             integer_value, std::move(str), 0x10);
 205                         str += ")";
 206                     }
 207                     return;
 208                 }
 209                 case ::LLVMIntegerTypeKind:
 210                 {
 211                     switch(::LLVMGetIntTypeWidth(llvm_type))
 212                     {
 213                     case 8:
 214                     {
 215                         auto integer_value = *static_cast<const std::uint8_t *>(value);
 216                         str += "0x";
 217                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 218                             integer_value, std::move(str), 0x10);
 219                         str += " ";
 220                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 221                             integer_value, std::move(str));
 222                         str += " ";
 223                         str = json::ast::Number_value::append_signed_integer_to_string(
 224                             static_cast<std::int8_t>(integer_value), std::move(str));
 225                         return;
 226                     }
 227                     case 16:
 228                     {
 229                         auto integer_value = *static_cast<const std::uint16_t *>(value);
 230                         str += "0x";
 231                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 232                             integer_value, std::move(str), 0x10);
 233                         str += " ";
 234                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 235                             integer_value, std::move(str));
 236                         str += " ";
 237                         str = json::ast::Number_value::append_signed_integer_to_string(
 238                             static_cast<std::int16_t>(integer_value), std::move(str));
 239                         return;
 240                     }
 241                     case 32:
 242                     {
 243                         auto integer_value = *static_cast<const std::uint32_t *>(value);
 244                         str += "0x";
 245                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 246                             integer_value, std::move(str), 0x10);
 247                         str += " ";
 248                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 249                             integer_value, std::move(str));
 250                         str += " ";
 251                         str = json::ast::Number_value::append_signed_integer_to_string(
 252                             static_cast<std::int32_t>(integer_value), std::move(str));
 253                         return;
 254                     }
 255                     case 64:
 256                     {
 257                         auto integer_value = *static_cast<const std::uint64_t *>(value);
 258                         str += "0x";
 259                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 260                             integer_value, std::move(str), 0x10);
 261                         str += " ";
 262                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 263                             integer_value, std::move(str));
 264                         str += " ";
 265                         str = json::ast::Number_value::append_signed_integer_to_string(
 266                             static_cast<std::int64_t>(integer_value), std::move(str));
 267                         return;
 268                     }
 269                     }
 270                     break;
 271                 }
 272                 }
 273                 assert(!"unhandled type");
 274                 throw std::runtime_error("unhandled type");
 275             }
 276             virtual void visit(spirv_to_llvm::Vector_type_descriptor &type) override
 277             {
 278                 auto llvm_element_type = type.get_element_type()->get_or_make_type().type;
 279                 std::size_t element_size =
 280                     ::LLVMABISizeOfType(this_->data_layout.get(), llvm_element_type);
 281                 std::size_t element_count = type.get_element_count();
 282                 str += "<";
 283                 auto separator = "";
 284                 for(std::size_t i = 0; i < element_count; i++)
 285                 {
 286                     str += separator;
 287                     separator = ", ";
 288                     str = this_->append_value_to_string(
 289                         std::move(str),
 290                         *type.get_element_type(),
 291                         static_cast<const char *>(value) + i * element_size);
 292                 }
 293                 str += ">";
 294             }
 295             virtual void visit(spirv_to_llvm::Matrix_type_descriptor &type) override
 296             {
 297                 assert(!"dumping matrix not implemented");
 298                 throw std::runtime_error("dumping matrix not implemented");
 299 #warning dumping matrix not implemented
 300             }
 301             virtual void visit(spirv_to_llvm::Array_type_descriptor &type) override
 302             {
 303                 auto llvm_element_type = type.get_element_type()->get_or_make_type().type;
 304                 std::size_t element_size =
 305                     ::LLVMABISizeOfType(this_->data_layout.get(), llvm_element_type);
 306                 std::size_t element_count = type.get_element_count();
 307                 str += "[";
 308                 auto separator = "";
 309                 for(std::size_t i = 0; i < element_count; i++)
 310                 {
 311                     str += separator;
 312                     separator = ", ";
 313                     str = this_->append_value_to_string(
 314                         std::move(str),
 315                         *type.get_element_type(),
 316                         static_cast<const char *>(value) + i * element_size);
 317                 }
 318                 str += "]";
 319             }
 320             virtual void visit(spirv_to_llvm::Pointer_type_descriptor &type) override
 321             {
 322                 str += "pointer:0x";
 323                 str = json::ast::Number_value::append_unsigned_integer_to_string(
 324                     reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value)),
 325                     std::move(str),
 326                     0x10);
 327             }
 328             virtual void visit(spirv_to_llvm::Function_type_descriptor &type) override
 329             {
 330                 str += "function:0x";
 331                 str = json::ast::Number_value::append_unsigned_integer_to_string(
 332                     reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value)),
 333                     std::move(str),
 334                     0x10);
 335             }
 336             virtual void visit(spirv_to_llvm::Struct_type_descriptor &type) override
 337             {
 338                 auto &&members = type.get_members(true);
 339                 auto llvm_type = type.get_or_make_type().type;
 340                 str += "{";
 341                 auto separator = "";
 342                 for(auto &member : members)
 343                 {
 344                     str += separator;
 345                     separator = ", ";
 346                     str = this_->append_value_to_string(
 347                         std::move(str),
 348                         *member.type,
 349                         static_cast<const char *>(value)
 350                             + ::LLVMOffsetOfElement(
 351                                   this_->data_layout.get(), llvm_type, member.llvm_member_index));
 352                 }
 353                 str += "}";
 354             }
 355         };
 356         type.visit(Visitor(this, str, value));
 357         return str;
 358     }
 359 };
 360
 361 void Graphics_pipeline::dump_vertex_shader_output_struct(const void *output_struct) const
 362 {
 363     std::cerr << "output: "
 364               << implementation->append_value_to_string(
 365                      {}, *implementation->vertex_shader_output_struct, output_struct)
 366               << std::endl;
 367 }
 368
 369 void Graphics_pipeline::run(std::uint32_t vertex_start_index,
 370                             std::uint32_t vertex_end_index,
 371                             std::uint32_t instance_id,
 372                             const vulkan::Vulkan_image &color_attachment,
 373                             void *const *bindings)
 374 {
 375     typedef std::uint32_t Pixel_type;
 376     assert(color_attachment.descriptor.tiling == VK_IMAGE_TILING_LINEAR);
 377     auto color_attachment_memory_properties = color_attachment.descriptor.get_memory_properties();
 378     auto color_attachment_memory_properties_color_component =
 379         color_attachment_memory_properties.get_color_component();
 380     std::size_t color_attachment_stride = color_attachment_memory_properties_color_component.stride;
 381     std::size_t color_attachment_pixel_size =
 382         color_attachment_memory_properties_color_component.pixel_size;
 383     assert(color_attachment_memory_properties_color_component.offset_from_array_layer_start == 0);
 384     void *color_attachment_memory = color_attachment.memory.get();
 385     float viewport_x_scale, viewport_x_offset, viewport_y_scale, viewport_y_offset,
 386         viewport_z_scale, viewport_z_offset;
 387     {
 388         float px = viewport.width;
 389         float ox = viewport.x + 0.5f * viewport.width;
 390         float py = viewport.height;
 391         float oy = viewport.y + 0.5f * viewport.height;
 392         float pz = viewport.maxDepth - viewport.minDepth;
 393         float oz = viewport.minDepth;
 394         viewport_x_scale = px * 0.5f;
 395         viewport_x_offset = ox;
 396         viewport_y_scale = py * 0.5f;
 397         viewport_y_offset = oy;
 398         viewport_z_scale = pz;
 399         viewport_z_offset = oz;
 400     }
 401     constexpr std::size_t vec4_native_alignment = alignof(float) * 4;
 402     constexpr std::size_t max_alignment = alignof(std::max_align_t);
 403     constexpr std::size_t vec4_alignment =
 404         vec4_native_alignment > max_alignment ? max_alignment : vec4_native_alignment;
 405     constexpr std::size_t ivec4_native_alignment = alignof(std::int32_t) * 4;
 406     constexpr std::size_t ivec4_alignment =
 407         ivec4_native_alignment > max_alignment ? max_alignment : ivec4_native_alignment;
 408     struct alignas(vec4_alignment) Vec4
 409     {
 410         float x;
 411         float y;
 412         float z;
 413         float w;
 414         constexpr Vec4() noexcept : x(), y(), z(), w()
 415         {
 416         }
 417         constexpr explicit Vec4(float x, float y, float z, float w) noexcept : x(x),
 418                                                                                y(y),
 419                                                                                z(z),
 420                                                                                w(w)
 421         {
 422         }
 423     };
 424     struct alignas(ivec4_alignment) Ivec4
 425     {
 426         std::int32_t x;
 427         std::int32_t y;
 428         std::int32_t z;
 429         std::int32_t w;
 430         constexpr Ivec4() noexcept : x(), y(), z(), w()
 431         {
 432         }
 433         constexpr explicit Ivec4(std::int32_t x,
 434                                  std::int32_t y,
 435                                  std::int32_t z,
 436                                  std::int32_t w) noexcept : x(x),
 437                                                             y(y),
 438                                                             z(z),
 439                                                             w(w)
 440         {
 441         }
 442     };
 443     auto interpolate_float = [](float t, float v0, float v1) noexcept->float
 444     {
 445         return t * v1 + (1.0f - t) * v0;
 446     };
 447     auto interpolate_vec4 = [interpolate_float](
 448                                 float t, const Vec4 &v0, const Vec4 &v1) noexcept->Vec4
 449     {
 450         return Vec4(interpolate_float(t, v0.x, v1.x),
 451                     interpolate_float(t, v0.y, v1.y),
 452                     interpolate_float(t, v0.z, v1.z),
 453                     interpolate_float(t, v0.w, v1.w));
 454     };
 455     static constexpr std::size_t triangle_vertex_count = 3;
 456     struct Triangle
 457     {
 458         Vec4 vertexes[triangle_vertex_count];
 459         constexpr Triangle() noexcept : vertexes{}
 460         {
 461         }
 462         constexpr Triangle(const Vec4 &v0, const Vec4 &v1, const Vec4 &v2) noexcept
 463             : vertexes{v0, v1, v2}
 464         {
 465         }
 466     };
 467     auto solve_for_t = [](float v0, float v1) noexcept->float
 468     {
 469         // solves interpolate_float(t, v0, v1) == 0
 470         return v0 / (v0 - v1);
 471     };
 472     auto clip_edge = [solve_for_t, interpolate_vec4](const Vec4 &start_vertex,
 473                                                      const Vec4 &end_vertex,
 474                                                      Vec4 *output_vertexes,
 475                                                      std::size_t &output_vertex_count,
 476                                                      auto eval_vertex) -> bool
 477     {
 478         // eval_vertex returns a non-negative number if the vertex is inside the clip volume
 479         float start_vertex_signed_distance = eval_vertex(start_vertex);
 480         float end_vertex_signed_distance = eval_vertex(end_vertex);
 481         if(start_vertex_signed_distance != start_vertex_signed_distance)
 482             return false; // triangle has a NaN coordinate; skip it
 483         if(start_vertex_signed_distance < 0)
 484         {
 485             // start_vertex is outside
 486             if(end_vertex_signed_distance < 0)
 487             {
 488                 // end_vertex is outside; do nothing
 489             }
 490             else
 491             {
 492                 // end_vertex is inside
 493                 output_vertexes[output_vertex_count++] = interpolate_vec4(
 494                     solve_for_t(start_vertex_signed_distance, end_vertex_signed_distance),
 495                     start_vertex,
 496                     end_vertex);
 497                 output_vertexes[output_vertex_count++] = end_vertex;
 498             }
 499         }
 500         else
 501         {
 502             // start_vertex is inside
 503             if(end_vertex_signed_distance < 0)
 504             {
 505                 // end_vertex is outside
 506                 output_vertexes[output_vertex_count++] = interpolate_vec4(
 507                     solve_for_t(start_vertex_signed_distance, end_vertex_signed_distance),
 508                     start_vertex,
 509                     end_vertex);
 510             }
 511             else
 512             {
 513                 // end_vertex is inside
 514                 output_vertexes[output_vertex_count++] = end_vertex;
 515             }
 516         }
 517         return true;
 518     };
 519     auto clip_triangles = [clip_edge](
 520         std::vector<Triangle> &triangles, std::vector<Triangle> &temp_triangles, auto eval_vertex)
 521     {
 522         temp_triangles.clear();
 523         for(auto &input_ref : triangles)
 524         {
 525             Triangle input = input_ref; // copy to enable compiler optimizations
 526             constexpr std::size_t max_clipped_output_vertex_count = 4;
 527             Vec4 output_vertexes[max_clipped_output_vertex_count];
 528             std::size_t output_vertex_count = 0;
 529             bool skip_triangle = false;
 530             std::size_t end_vertex_index = 1;
 531             for(std::size_t start_vertex_index = 0; start_vertex_index < triangle_vertex_count;
 532                 start_vertex_index++)
 533             {
 534                 if(!clip_edge(input.vertexes[start_vertex_index],
 535                               input.vertexes[end_vertex_index],
 536                               output_vertexes,
 537                               output_vertex_count,
 538                               eval_vertex))
 539                 {
 540                     skip_triangle = true;
 541                     break;
 542                 }
 543                 if(++end_vertex_index >= triangle_vertex_count)
 544                     end_vertex_index = 0;
 545             }
 546             if(skip_triangle)
 547                 continue;
 548             switch(output_vertex_count)
 549             {
 550             case 0:
 551             case 1:
 552             case 2:
 553                 continue;
 554             case 3:
 555                 temp_triangles.push_back(
 556                     Triangle(output_vertexes[0], output_vertexes[1], output_vertexes[2]));
 557                 continue;
 558             case 4:
 559                 temp_triangles.push_back(
 560                     Triangle(output_vertexes[0], output_vertexes[1], output_vertexes[2]));
 561                 temp_triangles.push_back(
 562                     Triangle(output_vertexes[0], output_vertexes[2], output_vertexes[3]));
 563                 continue;
 564             }
 565             assert(!"clipping algorithm failed");
 566         }
 567         temp_triangles.swap(triangles);
 568     };
 569     std::vector<Triangle> triangles;
 570     std::vector<Triangle> temp_triangles;
 571     constexpr std::size_t chunk_max_size = 96;
 572     static_assert(chunk_max_size % triangle_vertex_count == 0, "");
 573     std::unique_ptr<unsigned char[]> chunk_vertex_buffer(
 574         new unsigned char[get_vertex_shader_output_struct_size() * chunk_max_size]);
 575     while(vertex_start_index < vertex_end_index)
 576     {
 577         std::uint32_t chunk_size = vertex_end_index - vertex_start_index;
 578         if(chunk_size > chunk_max_size)
 579             chunk_size = chunk_max_size;
 580         auto current_vertex_start_index = vertex_start_index;
 581         vertex_start_index += chunk_size;
 582         run_vertex_shader(current_vertex_start_index,
 583                           current_vertex_start_index + chunk_size,
 584                           instance_id,
 585                           chunk_vertex_buffer.get(),
 586                           bindings);
 587         const unsigned char *current_vertex =
 588             chunk_vertex_buffer.get() + vertex_shader_position_output_offset;
 589         triangles.clear();
 590         for(std::uint32_t i = 0; i + triangle_vertex_count <= chunk_size;
 591             i += triangle_vertex_count)
 592         {
 593             Triangle triangle;
 594             for(std::size_t j = 0; j < triangle_vertex_count; j++)
 595             {
 596                 triangle.vertexes[j] = *reinterpret_cast<const Vec4 *>(current_vertex);
 597                 current_vertex += vertex_shader_output_struct_size;
 598             }
 599             triangles.push_back(triangle);
 600         }
 601         // clip to 0 <= vertex.z
 602         clip_triangles(triangles,
 603                        temp_triangles,
 604                        [](const Vec4 &vertex) noexcept->float
 605                        {
 606                            return vertex.z;
 607                        });
 608         // clip to vertex.z <= vertex.w
 609         clip_triangles(triangles,
 610                        temp_triangles,
 611                        [](const Vec4 &vertex) noexcept->float
 612                        {
 613                            return vertex.w - vertex.z;
 614                        });
 615         // clip to -vertex.w <= vertex.x
 616         clip_triangles(triangles,
 617                        temp_triangles,
 618                        [](const Vec4 &vertex) noexcept->float
 619                        {
 620                            return vertex.x + vertex.w;
 621                        });
 622         // clip to vertex.x <= vertex.w
 623         clip_triangles(triangles,
 624                        temp_triangles,
 625                        [](const Vec4 &vertex) noexcept->float
 626                        {
 627                            return vertex.w - vertex.x;
 628                        });
 629         // clip to -vertex.w <= vertex.y
 630         clip_triangles(triangles,
 631                        temp_triangles,
 632                        [](const Vec4 &vertex) noexcept->float
 633                        {
 634                            return vertex.y + vertex.w;
 635                        });
 636         // clip to vertex.y <= vertex.w
 637         clip_triangles(triangles,
 638                        temp_triangles,
 639                        [](const Vec4 &vertex) noexcept->float
 640                        {
 641                            return vertex.w - vertex.y;
 642                        });
 643         VkOffset2D clipped_scissor_rect_min = scissor_rect.offset;
 644         VkOffset2D clipped_scissor_rect_end = {
 645             .x = scissor_rect.offset.x + static_cast<std::int32_t>(scissor_rect.extent.width),
 646             .y = scissor_rect.offset.y + static_cast<std::int32_t>(scissor_rect.extent.height),
 647         };
 648         if(clipped_scissor_rect_min.x < 0)
 649             clipped_scissor_rect_min.x = 0;
 650         if(clipped_scissor_rect_min.y < 0)
 651             clipped_scissor_rect_min.y = 0;
 652         if(clipped_scissor_rect_end.x > color_attachment.descriptor.extent.width)
 653             clipped_scissor_rect_end.x = color_attachment.descriptor.extent.width;
 654         if(clipped_scissor_rect_end.y < color_attachment.descriptor.extent.height)
 655             clipped_scissor_rect_end.y = color_attachment.descriptor.extent.height;
 656         if(clipped_scissor_rect_end.x <= clipped_scissor_rect_min.x)
 657             continue;
 658         if(clipped_scissor_rect_end.y <= clipped_scissor_rect_min.y)
 659             continue;
 660         for(std::size_t triangle_index = 0; triangle_index < triangles.size(); triangle_index++)
 661         {
 662             Triangle triangle = triangles[triangle_index];
 663             Vec4 projected_triangle_and_inv_w[triangle_vertex_count];
 664             Vec4 framebuffer_coordinates[triangle_vertex_count];
 665             for(std::size_t i = 0; i < triangle_vertex_count; i++)
 666             {
 667                 projected_triangle_and_inv_w[i].w = 1.0f / triangle.vertexes[i].w;
 668                 projected_triangle_and_inv_w[i].x =
 669                     triangle.vertexes[i].x * projected_triangle_and_inv_w[i].w;
 670                 projected_triangle_and_inv_w[i].y =
 671                     triangle.vertexes[i].y * projected_triangle_and_inv_w[i].w;
 672                 projected_triangle_and_inv_w[i].z =
 673                     triangle.vertexes[i].z * projected_triangle_and_inv_w[i].w;
 674                 framebuffer_coordinates[i] =
 675                     Vec4(projected_triangle_and_inv_w[i].x * viewport_x_scale + viewport_x_offset,
 676                          projected_triangle_and_inv_w[i].y * viewport_y_scale + viewport_y_offset,
 677                          projected_triangle_and_inv_w[i].z * viewport_z_scale + viewport_z_offset,
 678                          0);
 679             }
 680             float orientation = 0;
 681             for(std::size_t start_vertex_index = 0, end_vertex_index = 1;
 682                 start_vertex_index < triangle_vertex_count;
 683                 start_vertex_index++)
 684             {
 685                 float x1 = framebuffer_coordinates[start_vertex_index].x;
 686                 float y1 = framebuffer_coordinates[start_vertex_index].y;
 687                 float x2 = framebuffer_coordinates[end_vertex_index].x;
 688                 float y2 = framebuffer_coordinates[end_vertex_index].y;
 689                 orientation += x2 * y1 - x1 * y2;
 690                 if(++end_vertex_index >= triangle_vertex_count)
 691                     end_vertex_index = 0;
 692             }
 693             if(!(orientation < 0)
 694                && !(orientation > 0)) // zero area triangle or triangle coordinate is NaN
 695                 continue;
 696             // orientation > 0 for counter-clockwise triangle
 697             // orientation < 0 for clockwise triangle
 698             std::int32_t min_x, end_x, min_y, end_y;
 699             bool first = true;
 700             for(std::size_t i = 0; i < triangle_vertex_count; i++)
 701             {
 702                 // x and y will be >= 0 so we can use truncate instead of floor for speed
 703                 auto current_min_x = static_cast<std::int32_t>(framebuffer_coordinates[i].x);
 704                 auto current_min_y = static_cast<std::int32_t>(framebuffer_coordinates[i].y);
 705                 std::int32_t current_end_x = current_min_x + 1;
 706                 std::int32_t current_end_y = current_min_y + 1;
 707                 if(first || current_min_x < min_x)
 708                     min_x = current_min_x;
 709                 if(first || current_end_x > end_x)
 710                     end_x = current_end_x;
 711                 if(first || current_min_y < min_y)
 712                     min_y = current_min_y;
 713                 if(first || current_end_y > end_y)
 714                     end_y = current_end_y;
 715                 first = false;
 716             }
 717             if(min_x < clipped_scissor_rect_min.x)
 718                 min_x = clipped_scissor_rect_min.x;
 719             if(end_x > clipped_scissor_rect_end.x)
 720                 end_x = clipped_scissor_rect_end.x;
 721             if(min_y < clipped_scissor_rect_min.y)
 722                 min_y = clipped_scissor_rect_min.y;
 723             if(end_y > clipped_scissor_rect_end.y)
 724                 end_y = clipped_scissor_rect_end.y;
 725             constexpr int log2_scale = 16;
 726             constexpr auto scale = 1LL << log2_scale;
 727             typedef std::int64_t Edge_equation_integer_type;
 728             struct Edge_equation
 729             {
 730                 Edge_equation_integer_type a;
 731                 Edge_equation_integer_type b;
 732                 Edge_equation_integer_type c;
 733                 Edge_equation_integer_type padding;
 734                 constexpr Edge_equation() noexcept : a(), b(), c(), padding()
 735                 {
 736                 }
 737                 constexpr Edge_equation(Edge_equation_integer_type a,
 738                                         Edge_equation_integer_type b,
 739                                         Edge_equation_integer_type c) noexcept : a(a),
 740                                                                                  b(b),
 741                                                                                  c(c),
 742                                                                                  padding()
 743                 {
 744                 }
 745                 constexpr bool inside(std::int32_t x, std::int32_t y) const noexcept
 746                 {
 747                     return a * x + b * y + c >= 0;
 748                 }
 749             };
 750             Edge_equation edge_equations[triangle_vertex_count];
 751             bool skip_triangle = false;
 752             for(std::size_t start_vertex_index = 0, end_vertex_index = 1, other_vertex_index = 2;
 753                 start_vertex_index < triangle_vertex_count;
 754                 start_vertex_index++)
 755             {
 756                 float x1_float = framebuffer_coordinates[start_vertex_index].x;
 757                 float y1_float = framebuffer_coordinates[start_vertex_index].y;
 758                 float x2_float = framebuffer_coordinates[end_vertex_index].x;
 759                 float y2_float = framebuffer_coordinates[end_vertex_index].y;
 760                 [[gnu::unused]] float x3_float = framebuffer_coordinates[other_vertex_index].x;
 761                 [[gnu::unused]] float y3_float = framebuffer_coordinates[other_vertex_index].y;
 762                 auto x1_fixed = static_cast<Edge_equation_integer_type>(x1_float * scale);
 763                 auto y1_fixed = static_cast<Edge_equation_integer_type>(y1_float * scale);
 764                 auto x2_fixed = static_cast<Edge_equation_integer_type>(x2_float * scale);
 765                 auto y2_fixed = static_cast<Edge_equation_integer_type>(y2_float * scale);
 766                 [[gnu::unused]] auto x3_fixed =
 767                     static_cast<Edge_equation_integer_type>(x3_float * scale);
 768                 [[gnu::unused]] auto y3_fixed =
 769                     static_cast<Edge_equation_integer_type>(y3_float * scale);
 770                 Edge_equation_integer_type a;
 771                 Edge_equation_integer_type b;
 772                 Edge_equation_integer_type c;
 773                 {
 774                     // solve a * x1 + b * y1 + c == 0 &&
 775                     // a * x2 + b * y2 + c == 0 &&
 776                     // a * x3 + b * y3 + c >= 0
 777                     if(x1_fixed == x2_fixed && y1_fixed == y2_fixed)
 778                     {
 779                         // rounded to a zero-area triangle
 780                         skip_triangle = true;
 781                         break;
 782                     }
 783                     Edge_equation_integer_type a_fixed = (y1_fixed - y2_fixed) * scale;
 784                     Edge_equation_integer_type b_fixed = (x2_fixed - x1_fixed) * scale;
 785                     Edge_equation_integer_type c_fixed =
 786                         (x1_fixed * y2_fixed - x2_fixed * y1_fixed);
 787
 788                     // offset to end up checking at pixel center instead of top-left pixel corner
 789                     c_fixed += (a_fixed + b_fixed) / 2;
 790
 791                     a = a_fixed;
 792                     b = b_fixed;
 793                     c = c_fixed;
 794                     if(orientation > 0)
 795                     {
 796                         // fix sign
 797                         a = -a;
 798                         b = -b;
 799                         c = -c;
 800                     }
 801                 }
 802                 // handle top-left fill rule
 803                 if(a < 0 || (a == 0 && b < 0))
 804                 {
 805                     // not a top-left edge, fixup c
 806                     // effectively changes the '>=' to '>' in Edge_equation::inside
 807                     c--;
 808                 }
 809
 810                 edge_equations[start_vertex_index] = Edge_equation(a, b, c);
 811                 if(++end_vertex_index >= triangle_vertex_count)
 812                     end_vertex_index = 0;
 813                 if(++other_vertex_index >= triangle_vertex_count)
 814                     other_vertex_index = 0;
 815             }
 816             if(skip_triangle)
 817                 continue;
 818             auto fs = this->fragment_shader_function;
 819             for(std::int32_t y = min_y; y < end_y; y++)
 820             {
 821                 for(std::int32_t x = min_x; x < end_x; x++)
 822                 {
 823                     bool inside = true;
 824                     for(auto &edge_equation : edge_equations)
 825                     {
 826                         inside &= edge_equation.inside(x, y);
 827                     }
 828                     if(inside)
 829                     {
 830                         auto *pixel = reinterpret_cast<Pixel_type *>(
 831                             static_cast<unsigned char *>(color_attachment_memory)
 832                             + (static_cast<std::size_t>(x) * color_attachment_pixel_size
 833                                + static_cast<std::size_t>(y) * color_attachment_stride));
 834                         fs(pixel);
 835                     }
 836                 }
 837             }
 838         };
 839     }
 840 }
 841
 842 std::unique_ptr<Graphics_pipeline> Graphics_pipeline::make(
 843     Pipeline_cache *pipeline_cache, const VkGraphicsPipelineCreateInfo &create_info)
 844 {
 845     assert(create_info.sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
 846     auto *render_pass = Render_pass::from_handle(create_info.renderPass);
 847     assert(render_pass);
 848     auto *pipeline_layout = Pipeline_layout::from_handle(create_info.layout);
 849     assert(pipeline_layout);
 850     if(create_info.flags & VK_PIPELINE_CREATE_DERIVATIVE_BIT)
 851     {
 852 #warning implement creating derived pipelines
 853         throw std::runtime_error("creating derived pipelines is not implemented");
 854     }
 855     auto implementation = std::make_shared<Implementation>();
 856     auto optimization_level = ::LLVMCodeGenLevelDefault;
 857     if(create_info.flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
 858         optimization_level = ::LLVMCodeGenLevelNone;
 859     auto llvm_target_machine =
 860         llvm_wrapper::Target_machine::create_native_target_machine(optimization_level);
 861     implementation->compiled_shaders.reserve(create_info.stageCount);
 862     util::Enum_set<spirv::Execution_model> found_shader_stages;
 863     for(std::size_t i = 0; i < create_info.stageCount; i++)
 864     {
 865         auto &stage_info = create_info.pStages[i];
 866         auto execution_models =
 867             vulkan::get_execution_models_from_shader_stage_flags(stage_info.stage);
 868         assert(execution_models.size() == 1);
 869         auto execution_model = *execution_models.begin();
 870         bool added_to_found_shader_stages =
 871             std::get<1>(found_shader_stages.insert(execution_model));
 872         if(!added_to_found_shader_stages)
 873             throw std::runtime_error("duplicate shader stage");
 874         auto *shader_module = Shader_module::from_handle(stage_info.module);
 875         assert(shader_module);
 876         {
 877             spirv::Dump_callbacks dump_callbacks;
 878             try
 879             {
 880                 spirv::parse(dump_callbacks, shader_module->words(), shader_module->word_count());
 881             }
 882             catch(spirv::Parser_error &e)
 883             {
 884                 std::cerr << dump_callbacks.ss.str() << std::endl;
 885                 throw;
 886             }
 887             std::cerr << dump_callbacks.ss.str() << std::endl;
 888         }
 889         assert(create_info.pVertexInputState);
 890         assert(create_info.pVertexInputState->sType
 891                == VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO);
 892         auto compiled_shader = spirv_to_llvm::spirv_to_llvm(implementation->llvm_context.get(),
 893                                                             llvm_target_machine.get(),
 894                                                             shader_module->words(),
 895                                                             shader_module->word_count(),
 896                                                             implementation->compiled_shaders.size(),
 897                                                             execution_model,
 898                                                             stage_info.pName,
 899                                                             create_info.pVertexInputState);
 900         std::cerr << "Translation to LLVM succeeded." << std::endl;
 901         ::LLVMDumpModule(compiled_shader.module.get());
 902         bool failed =
 903             ::LLVMVerifyModule(compiled_shader.module.get(), ::LLVMPrintMessageAction, nullptr);
 904         if(failed)
 905             throw std::runtime_error("LLVM module verification failed");
 906         implementation->compiled_shaders.push_back(std::move(compiled_shader));
 907     }
 908     implementation->data_layout = llvm_target_machine.create_target_data_layout();
 909     implementation->jit_stack =
 910         llvm_wrapper::Orc_compile_stack::create(std::move(llvm_target_machine), optimize_module);
 911     Vertex_shader_function vertex_shader_function = nullptr;
 912     std::size_t vertex_shader_output_struct_size = 0;
 913     util::optional<std::size_t> vertex_shader_position_output_offset;
 914     Fragment_shader_function fragment_shader_function = nullptr;
 915     for(auto &compiled_shader : implementation->compiled_shaders)
 916     {
 917         implementation->jit_stack.add_eagerly_compiled_ir(
 918             std::move(compiled_shader.module),
 919             &spirv_to_llvm::Jit_symbol_resolver::resolve,
 920             static_cast<void *>(&implementation->jit_symbol_resolver));
 921         auto shader_entry_point_address = implementation->jit_stack.get_symbol_address(
 922             compiled_shader.entry_function_name.c_str());
 923         std::cerr << "shader entry: " << compiled_shader.entry_function_name << ": "
 924                   << reinterpret_cast<void *>(shader_entry_point_address) << std::endl;
 925         assert(shader_entry_point_address);
 926         switch(compiled_shader.execution_model)
 927         {
 928         case spirv::Execution_model::fragment:
 929             fragment_shader_function =
 930                 reinterpret_cast<Fragment_shader_function>(shader_entry_point_address);
 931 #warning finish implementing Graphics_pipeline::make
 932             continue;
 933 #warning finish implementing Graphics_pipeline::make
 934             throw std::runtime_error("creating fragment shaders is not implemented");
 935         case spirv::Execution_model::geometry:
 936 #warning finish implementing Graphics_pipeline::make
 937             throw std::runtime_error("creating geometry shaders is not implemented");
 938         case spirv::Execution_model::gl_compute:
 939         case spirv::Execution_model::kernel:
 940             throw std::runtime_error("can't create compute shaders from Graphics_pipeline::make");
 941         case spirv::Execution_model::tessellation_control:
 942         case spirv::Execution_model::tessellation_evaluation:
 943 #warning finish implementing Graphics_pipeline::make
 944             throw std::runtime_error("creating tessellation shaders is not implemented");
 945         case spirv::Execution_model::vertex:
 946         {
 947             vertex_shader_function =
 948                 reinterpret_cast<Vertex_shader_function>(shader_entry_point_address);
 949             implementation->vertex_shader_output_struct = compiled_shader.outputs_struct;
 950             auto llvm_vertex_shader_output_struct =
 951                 implementation->vertex_shader_output_struct->get_or_make_type().type;
 952             vertex_shader_output_struct_size = ::LLVMABISizeOfType(
 953                 implementation->data_layout.get(), llvm_vertex_shader_output_struct);
 954             for(auto &member : implementation->vertex_shader_output_struct->get_members(true))
 955             {
 956                 for(auto &decoration : member.decorations)
 957                 {
 958                     if(decoration.value == spirv::Decoration::built_in)
 959                     {
 960                         auto &builtin =
 961                             util::get<spirv::Decoration_built_in_parameters>(decoration.parameters);
 962                         if(builtin.built_in == spirv::Built_in::position)
 963                         {
 964                             vertex_shader_position_output_offset =
 965                                 ::LLVMOffsetOfElement(implementation->data_layout.get(),
 966                                                       llvm_vertex_shader_output_struct,
 967                                                       member.llvm_member_index);
 968                             break;
 969                         }
 970                     }
 971                 }
 972                 if(vertex_shader_position_output_offset)
 973                     break;
 974                 if(auto *struct_type =
 975                        dynamic_cast<spirv_to_llvm::Struct_type_descriptor *>(member.type.get()))
 976                 {
 977                     std::size_t struct_offset =
 978                         ::LLVMOffsetOfElement(implementation->data_layout.get(),
 979                                               llvm_vertex_shader_output_struct,
 980                                               member.llvm_member_index);
 981                     auto llvm_struct_type = struct_type->get_or_make_type().type;
 982                     for(auto &submember : struct_type->get_members(true))
 983                     {
 984                         for(auto &decoration : submember.decorations)
 985                         {
 986                             if(decoration.value == spirv::Decoration::built_in)
 987                             {
 988                                 auto &builtin = util::get<spirv::Decoration_built_in_parameters>(
 989                                     decoration.parameters);
 990                                 if(builtin.built_in == spirv::Built_in::position)
 991                                 {
 992                                     vertex_shader_position_output_offset =
 993                                         struct_offset
 994                                         + ::LLVMOffsetOfElement(implementation->data_layout.get(),
 995                                                                 llvm_struct_type,
 996                                                                 submember.llvm_member_index);
 997                                     break;
 998                                 }
 999                             }
1000                         }
1001                         if(vertex_shader_position_output_offset)
1002                             break;
1003                     }
1004                 }
1005                 if(vertex_shader_position_output_offset)
1006                     break;
1007             }
1008             if(!vertex_shader_position_output_offset)
1009                 throw std::runtime_error("can't find vertex shader Position output");
1010 #warning finish implementing Graphics_pipeline::make
1011             continue;
1012         }
1013         }
1014         throw std::runtime_error("unknown shader kind");
1015     }
1016 #warning finish implementing Graphics_pipeline::make
1017     if(!vertex_shader_function)
1018         throw std::runtime_error("graphics pipeline doesn't have vertex shader");
1019     if(!create_info.pViewportState)
1020         throw std::runtime_error("missing viewport state");
1021     if(create_info.pViewportState->viewportCount != 1)
1022         throw std::runtime_error("unimplemented viewport count");
1023     if(!create_info.pViewportState->pViewports)
1024         throw std::runtime_error("missing viewport list");
1025     if(!create_info.pViewportState->pScissors)
1026         throw std::runtime_error("missing scissor rectangle list");
1027     assert(vertex_shader_position_output_offset);
1028     return std::unique_ptr<Graphics_pipeline>(
1029         new Graphics_pipeline(std::move(implementation),
1030                               vertex_shader_function,
1031                               vertex_shader_output_struct_size,
1032                               *vertex_shader_position_output_offset,
1033                               fragment_shader_function,
1034                               create_info.pViewportState->pViewports[0],
1035                               create_info.pViewportState->pScissors[0]));
1036 }
1037 }
1038 }