src/pipeline/pipeline.cpp

   1 /*
   2  * Copyright 2017 Jacob Lifshay
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in all
  12  * copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  *
  22  */
  23 #include "pipeline.h"
  24 #include "spirv_to_llvm/spirv_to_llvm.h"
  25 #include "llvm_wrapper/llvm_wrapper.h"
  26 #include "llvm_wrapper/orc_compile_stack.h"
  27 #include "vulkan/util.h"
  28 #include "util/soft_float.h"
  29 #include "json/json.h"
  30 #include <stdexcept>
  31 #include <cassert>
  32 #include <vector>
  33 #include <iostream>
  34
  35 namespace vulkan_cpu
  36 {
  37 namespace pipeline
  38 {
  39 class Pipeline_cache
  40 {
  41 };
  42
  43 void Api_object_deleter<Pipeline_cache>::operator()(Pipeline_cache *pipeline_cache) const noexcept
  44 {
  45     delete pipeline_cache;
  46 }
  47
  48 class Render_pass
  49 {
  50 };
  51
  52 void Api_object_deleter<Render_pass>::operator()(Render_pass *render_pass) const noexcept
  53 {
  54     delete render_pass;
  55 }
  56
  57 template <>
  58 Render_pass_handle Render_pass_handle::make(const VkRenderPassCreateInfo &render_pass_create_info)
  59 {
  60 #warning finish implementing Render_pass_handle::make
  61     return Render_pass_handle(new Render_pass());
  62 }
  63
  64 class Pipeline_layout
  65 {
  66 };
  67
  68 void Api_object_deleter<Pipeline_layout>::operator()(Pipeline_layout *pipeline_layout) const
  69     noexcept
  70 {
  71     delete pipeline_layout;
  72 }
  73
  74 template <>
  75 Pipeline_layout_handle Pipeline_layout_handle::make(
  76     const VkPipelineLayoutCreateInfo &pipeline_layout_create_info)
  77 {
  78 #warning finish implementing Pipeline_layout_handle::make
  79     return Pipeline_layout_handle(new Pipeline_layout());
  80 }
  81
  82 llvm_wrapper::Module Pipeline::optimize_module(llvm_wrapper::Module module,
  83                                                ::LLVMTargetMachineRef target_machine)
  84 {
  85     switch(llvm_wrapper::Target_machine::get_code_gen_opt_level(target_machine))
  86     {
  87     case ::LLVMCodeGenLevelNone:
  88     case ::LLVMCodeGenLevelLess:
  89         break;
  90     case ::LLVMCodeGenLevelDefault:
  91     case ::LLVMCodeGenLevelAggressive:
  92     {
  93 #warning finish implementing module optimizations
  94         {
  95             auto manager = llvm_wrapper::Pass_manager::create_function_pass_manager(module.get());
  96             ::LLVMAddAnalysisPasses(target_machine, manager.get());
  97             ::LLVMAddPromoteMemoryToRegisterPass(manager.get());
  98             ::LLVMAddScalarReplAggregatesPass(manager.get());
  99             ::LLVMAddScalarizerPass(manager.get());
 100             ::LLVMAddEarlyCSEMemSSAPass(manager.get());
 101             ::LLVMAddSCCPPass(manager.get());
 102             ::LLVMAddAggressiveDCEPass(manager.get());
 103             ::LLVMAddLICMPass(manager.get());
 104             ::LLVMAddIndVarSimplifyPass(manager.get());
 105             ::LLVMAddCFGSimplificationPass(manager.get());
 106             ::LLVMAddReassociatePass(manager.get());
 107             ::LLVMAddInstructionCombiningPass(manager.get());
 108             ::LLVMAddNewGVNPass(manager.get());
 109             ::LLVMAddCorrelatedValuePropagationPass(manager.get());
 110             ::LLVMInitializeFunctionPassManager(manager.get());
 111             for(auto fn = ::LLVMGetFirstFunction(module.get()); fn; fn = ::LLVMGetNextFunction(fn))
 112                 ::LLVMRunFunctionPassManager(manager.get(), fn);
 113             ::LLVMFinalizeFunctionPassManager(manager.get());
 114         }
 115         {
 116             auto manager = llvm_wrapper::Pass_manager::create_module_pass_manager();
 117             ::LLVMAddAnalysisPasses(target_machine, manager.get());
 118             ::LLVMAddIPSCCPPass(manager.get());
 119             ::LLVMAddFunctionInliningPass(manager.get());
 120             ::LLVMAddDeadArgEliminationPass(manager.get());
 121             ::LLVMAddGlobalDCEPass(manager.get());
 122             ::LLVMRunPassManager(manager.get(), module.get());
 123         }
 124         {
 125             auto manager = llvm_wrapper::Pass_manager::create_function_pass_manager(module.get());
 126             ::LLVMAddAnalysisPasses(target_machine, manager.get());
 127             ::LLVMAddCFGSimplificationPass(manager.get());
 128             ::LLVMAddPromoteMemoryToRegisterPass(manager.get());
 129             ::LLVMAddScalarReplAggregatesPass(manager.get());
 130             ::LLVMAddLICMPass(manager.get());
 131             ::LLVMAddIndVarSimplifyPass(manager.get());
 132             ::LLVMAddReassociatePass(manager.get());
 133             ::LLVMAddInstructionCombiningPass(manager.get());
 134             ::LLVMAddLoopUnrollPass(manager.get());
 135             ::LLVMAddSLPVectorizePass(manager.get());
 136             ::LLVMAddAggressiveDCEPass(manager.get());
 137             ::LLVMInitializeFunctionPassManager(manager.get());
 138             for(auto fn = ::LLVMGetFirstFunction(module.get()); fn; fn = ::LLVMGetNextFunction(fn))
 139                 ::LLVMRunFunctionPassManager(manager.get(), fn);
 140             ::LLVMFinalizeFunctionPassManager(manager.get());
 141         }
 142         std::cerr << "optimized module:" << std::endl;
 143         ::LLVMDumpModule(module.get());
 144         break;
 145     }
 146     }
 147     return module;
 148 }
 149
 150 struct Graphics_pipeline::Implementation
 151 {
 152     llvm_wrapper::Context llvm_context = llvm_wrapper::Context::create();
 153     spirv_to_llvm::Jit_symbol_resolver jit_symbol_resolver;
 154     llvm_wrapper::Orc_compile_stack jit_stack;
 155     llvm_wrapper::Target_data data_layout;
 156     std::vector<spirv_to_llvm::Converted_module> compiled_shaders;
 157     std::shared_ptr<spirv_to_llvm::Struct_type_descriptor> vertex_shader_output_struct;
 158     std::string append_value_to_string(std::string str,
 159                                        spirv_to_llvm::Type_descriptor &type,
 160                                        const void *value) const
 161     {
 162         struct Visitor : public spirv_to_llvm::Type_descriptor::Type_visitor
 163         {
 164             const Implementation *this_;
 165             std::string &str;
 166             const void *value;
 167             Visitor(const Implementation *this_, std::string &str, const void *value) noexcept
 168                 : this_(this_),
 169                   str(str),
 170                   value(value)
 171             {
 172             }
 173             virtual void visit(spirv_to_llvm::Simple_type_descriptor &type) override
 174             {
 175                 auto llvm_type = type.get_or_make_type().type;
 176                 switch(::LLVMGetTypeKind(llvm_type))
 177                 {
 178                 case ::LLVMVoidTypeKind:
 179                 case ::LLVMX86_FP80TypeKind:
 180                 case ::LLVMFP128TypeKind:
 181                 case ::LLVMPPC_FP128TypeKind:
 182                 case ::LLVMLabelTypeKind:
 183                 case ::LLVMFunctionTypeKind:
 184                 case ::LLVMStructTypeKind:
 185                 case ::LLVMArrayTypeKind:
 186                 case ::LLVMPointerTypeKind:
 187                 case ::LLVMVectorTypeKind:
 188                 case ::LLVMMetadataTypeKind:
 189                 case ::LLVMX86_MMXTypeKind:
 190                 case ::LLVMTokenTypeKind:
 191                     break;
 192                 case ::LLVMHalfTypeKind:
 193                 {
 194                     auto integer_value = *static_cast<const std::uint16_t *>(value);
 195                     auto float_value =
 196                         util::soft_float::ExtendedFloat::fromHalfPrecision(integer_value);
 197                     str = json::ast::Number_value::append_double_to_string(
 198                         static_cast<double>(float_value), std::move(str));
 199                     if(float_value.isNaN())
 200                     {
 201                         str += " (0x";
 202                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 203                             integer_value, std::move(str), 0x10);
 204                         str += ")";
 205                     }
 206                     return;
 207                 }
 208                 case ::LLVMFloatTypeKind:
 209                 {
 210                     static_assert(sizeof(std::uint32_t) == sizeof(float)
 211                                       && alignof(std::uint32_t) == alignof(float),
 212                                   "");
 213                     union
 214                     {
 215                         std::uint32_t integer_value;
 216                         float float_value;
 217                     };
 218                     integer_value = *static_cast<const std::uint32_t *>(value);
 219                     str = json::ast::Number_value::append_double_to_string(float_value,
 220                                                                            std::move(str));
 221                     if(std::isnan(float_value))
 222                     {
 223                         str += " (0x";
 224                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 225                             integer_value, std::move(str), 0x10);
 226                         str += ")";
 227                     }
 228                     return;
 229                 }
 230                 case ::LLVMDoubleTypeKind:
 231                 {
 232                     static_assert(sizeof(std::uint64_t) == sizeof(double)
 233                                       && alignof(std::uint64_t) == alignof(double),
 234                                   "");
 235                     union
 236                     {
 237                         std::uint64_t integer_value;
 238                         double float_value;
 239                     };
 240                     integer_value = *static_cast<const std::uint64_t *>(value);
 241                     str = json::ast::Number_value::append_double_to_string(float_value,
 242                                                                            std::move(str));
 243                     if(std::isnan(float_value))
 244                     {
 245                         str += " (0x";
 246                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 247                             integer_value, std::move(str), 0x10);
 248                         str += ")";
 249                     }
 250                     return;
 251                 }
 252                 case ::LLVMIntegerTypeKind:
 253                 {
 254                     switch(::LLVMGetIntTypeWidth(llvm_type))
 255                     {
 256                     case 8:
 257                     {
 258                         auto integer_value = *static_cast<const std::uint8_t *>(value);
 259                         str += "0x";
 260                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 261                             integer_value, std::move(str), 0x10);
 262                         str += " ";
 263                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 264                             integer_value, std::move(str));
 265                         str += " ";
 266                         str = json::ast::Number_value::append_signed_integer_to_string(
 267                             static_cast<std::int8_t>(integer_value), std::move(str));
 268                         return;
 269                     }
 270                     case 16:
 271                     {
 272                         auto integer_value = *static_cast<const std::uint16_t *>(value);
 273                         str += "0x";
 274                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 275                             integer_value, std::move(str), 0x10);
 276                         str += " ";
 277                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 278                             integer_value, std::move(str));
 279                         str += " ";
 280                         str = json::ast::Number_value::append_signed_integer_to_string(
 281                             static_cast<std::int16_t>(integer_value), std::move(str));
 282                         return;
 283                     }
 284                     case 32:
 285                     {
 286                         auto integer_value = *static_cast<const std::uint32_t *>(value);
 287                         str += "0x";
 288                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 289                             integer_value, std::move(str), 0x10);
 290                         str += " ";
 291                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 292                             integer_value, std::move(str));
 293                         str += " ";
 294                         str = json::ast::Number_value::append_signed_integer_to_string(
 295                             static_cast<std::int32_t>(integer_value), std::move(str));
 296                         return;
 297                     }
 298                     case 64:
 299                     {
 300                         auto integer_value = *static_cast<const std::uint64_t *>(value);
 301                         str += "0x";
 302                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 303                             integer_value, std::move(str), 0x10);
 304                         str += " ";
 305                         str = json::ast::Number_value::append_unsigned_integer_to_string(
 306                             integer_value, std::move(str));
 307                         str += " ";
 308                         str = json::ast::Number_value::append_signed_integer_to_string(
 309                             static_cast<std::int64_t>(integer_value), std::move(str));
 310                         return;
 311                     }
 312                     }
 313                     break;
 314                 }
 315                 }
 316                 assert(!"unhandled type");
 317                 throw std::runtime_error("unhandled type");
 318             }
 319             virtual void visit(spirv_to_llvm::Vector_type_descriptor &type) override
 320             {
 321                 auto llvm_element_type = type.get_element_type()->get_or_make_type().type;
 322                 std::size_t element_size =
 323                     ::LLVMABISizeOfType(this_->data_layout.get(), llvm_element_type);
 324                 std::size_t element_count = type.get_element_count();
 325                 str += "<";
 326                 auto separator = "";
 327                 for(std::size_t i = 0; i < element_count; i++)
 328                 {
 329                     str += separator;
 330                     separator = ", ";
 331                     str = this_->append_value_to_string(
 332                         std::move(str),
 333                         *type.get_element_type(),
 334                         static_cast<const char *>(value) + i * element_size);
 335                 }
 336                 str += ">";
 337             }
 338             virtual void visit(spirv_to_llvm::Matrix_type_descriptor &type) override
 339             {
 340                 assert(!"dumping matrix not implemented");
 341                 throw std::runtime_error("dumping matrix not implemented");
 342 #warning dumping matrix not implemented
 343             }
 344             virtual void visit(spirv_to_llvm::Array_type_descriptor &type) override
 345             {
 346                 auto llvm_element_type = type.get_element_type()->get_or_make_type().type;
 347                 std::size_t element_size =
 348                     ::LLVMABISizeOfType(this_->data_layout.get(), llvm_element_type);
 349                 std::size_t element_count = type.get_element_count();
 350                 str += "[";
 351                 auto separator = "";
 352                 for(std::size_t i = 0; i < element_count; i++)
 353                 {
 354                     str += separator;
 355                     separator = ", ";
 356                     str = this_->append_value_to_string(
 357                         std::move(str),
 358                         *type.get_element_type(),
 359                         static_cast<const char *>(value) + i * element_size);
 360                 }
 361                 str += "]";
 362             }
 363             virtual void visit(spirv_to_llvm::Pointer_type_descriptor &type) override
 364             {
 365                 str += "pointer:0x";
 366                 str = json::ast::Number_value::append_unsigned_integer_to_string(
 367                     reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value)),
 368                     std::move(str),
 369                     0x10);
 370             }
 371             virtual void visit(spirv_to_llvm::Function_type_descriptor &type) override
 372             {
 373                 str += "function:0x";
 374                 str = json::ast::Number_value::append_unsigned_integer_to_string(
 375                     reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value)),
 376                     std::move(str),
 377                     0x10);
 378             }
 379             virtual void visit(spirv_to_llvm::Struct_type_descriptor &type) override
 380             {
 381                 auto &&members = type.get_members(true);
 382                 auto llvm_type = type.get_or_make_type().type;
 383                 str += "{";
 384                 auto separator = "";
 385                 for(auto &member : members)
 386                 {
 387                     str += separator;
 388                     separator = ", ";
 389                     str = this_->append_value_to_string(
 390                         std::move(str),
 391                         *member.type,
 392                         static_cast<const char *>(value)
 393                             + ::LLVMOffsetOfElement(
 394                                   this_->data_layout.get(), llvm_type, member.llvm_member_index));
 395                 }
 396                 str += "}";
 397             }
 398         };
 399         type.visit(Visitor(this, str, value));
 400         return str;
 401     }
 402 };
 403
 404 void Graphics_pipeline::dump_vertex_shader_output_struct(const void *output_struct) const
 405 {
 406     std::cerr << "output: "
 407               << implementation->append_value_to_string(
 408                      {}, *implementation->vertex_shader_output_struct, output_struct)
 409               << std::endl;
 410 }
 411
 412 void Graphics_pipeline::run(std::uint32_t vertex_start_index,
 413                             std::uint32_t vertex_end_index,
 414                             std::uint32_t instance_id,
 415                             const image::Image &color_attachment,
 416                             void *const *bindings)
 417 {
 418     typedef std::uint32_t Pixel_type;
 419     assert(color_attachment.descriptor.tiling == VK_IMAGE_TILING_LINEAR);
 420     std::size_t color_attachment_stride = color_attachment.descriptor.get_memory_stride();
 421     std::size_t color_attachment_pixel_size = color_attachment.descriptor.get_memory_pixel_size();
 422     unsigned char *color_attachment_memory = color_attachment.memory.get();
 423     float viewport_x_scale, viewport_x_offset, viewport_y_scale, viewport_y_offset,
 424         viewport_z_scale, viewport_z_offset;
 425     {
 426         float px = viewport.width;
 427         float ox = viewport.x + 0.5f * viewport.width;
 428         float py = viewport.height;
 429         float oy = viewport.y + 0.5f * viewport.height;
 430         float pz = viewport.maxDepth - viewport.minDepth;
 431         float oz = viewport.minDepth;
 432         viewport_x_scale = px * 0.5f;
 433         viewport_x_offset = ox;
 434         viewport_y_scale = py * 0.5f;
 435         viewport_y_offset = oy;
 436         viewport_z_scale = pz;
 437         viewport_z_offset = oz;
 438     }
 439     constexpr std::size_t vec4_native_alignment = alignof(float) * 4;
 440     constexpr std::size_t max_alignment = alignof(std::max_align_t);
 441     constexpr std::size_t vec4_alignment =
 442         vec4_native_alignment > max_alignment ? max_alignment : vec4_native_alignment;
 443     constexpr std::size_t ivec4_native_alignment = alignof(std::int32_t) * 4;
 444     constexpr std::size_t ivec4_alignment =
 445         ivec4_native_alignment > max_alignment ? max_alignment : ivec4_native_alignment;
 446     struct alignas(vec4_alignment) Vec4
 447     {
 448         float x;
 449         float y;
 450         float z;
 451         float w;
 452         constexpr Vec4() noexcept : x(), y(), z(), w()
 453         {
 454         }
 455         constexpr explicit Vec4(float x, float y, float z, float w) noexcept : x(x),
 456                                                                                y(y),
 457                                                                                z(z),
 458                                                                                w(w)
 459         {
 460         }
 461     };
 462     struct alignas(ivec4_alignment) Ivec4
 463     {
 464         std::int32_t x;
 465         std::int32_t y;
 466         std::int32_t z;
 467         std::int32_t w;
 468         constexpr Ivec4() noexcept : x(), y(), z(), w()
 469         {
 470         }
 471         constexpr explicit Ivec4(std::int32_t x,
 472                                  std::int32_t y,
 473                                  std::int32_t z,
 474                                  std::int32_t w) noexcept : x(x),
 475                                                             y(y),
 476                                                             z(z),
 477                                                             w(w)
 478         {
 479         }
 480     };
 481     auto interpolate_float = [](float t, float v0, float v1) noexcept->float
 482     {
 483         return t * v1 + (1.0f - t) * v0;
 484     };
 485     auto interpolate_vec4 = [interpolate_float](
 486                                 float t, const Vec4 &v0, const Vec4 &v1) noexcept->Vec4
 487     {
 488         return Vec4(interpolate_float(t, v0.x, v1.x),
 489                     interpolate_float(t, v0.y, v1.y),
 490                     interpolate_float(t, v0.z, v1.z),
 491                     interpolate_float(t, v0.w, v1.w));
 492     };
 493     static constexpr std::size_t triangle_vertex_count = 3;
 494     struct Triangle
 495     {
 496         Vec4 vertexes[triangle_vertex_count];
 497         constexpr Triangle() noexcept : vertexes{}
 498         {
 499         }
 500         constexpr Triangle(const Vec4 &v0, const Vec4 &v1, const Vec4 &v2) noexcept
 501             : vertexes{v0, v1, v2}
 502         {
 503         }
 504     };
 505     auto solve_for_t = [](float v0, float v1) noexcept->float
 506     {
 507         // solves interpolate_float(t, v0, v1) == 0
 508         return v0 / (v0 - v1);
 509     };
 510     auto clip_edge = [solve_for_t, interpolate_vec4](const Vec4 &start_vertex,
 511                                                      const Vec4 &end_vertex,
 512                                                      Vec4 *output_vertexes,
 513                                                      std::size_t &output_vertex_count,
 514                                                      auto eval_vertex) -> bool
 515     {
 516         // eval_vertex returns a non-negative number if the vertex is inside the clip volume
 517         float start_vertex_signed_distance = eval_vertex(start_vertex);
 518         float end_vertex_signed_distance = eval_vertex(end_vertex);
 519         if(start_vertex_signed_distance != start_vertex_signed_distance)
 520             return false; // triangle has a NaN coordinate; skip it
 521         if(start_vertex_signed_distance < 0)
 522         {
 523             // start_vertex is outside
 524             if(end_vertex_signed_distance < 0)
 525             {
 526                 // end_vertex is outside; do nothing
 527             }
 528             else
 529             {
 530                 // end_vertex is inside
 531                 output_vertexes[output_vertex_count++] = interpolate_vec4(
 532                     solve_for_t(start_vertex_signed_distance, end_vertex_signed_distance),
 533                     start_vertex,
 534                     end_vertex);
 535                 output_vertexes[output_vertex_count++] = end_vertex;
 536             }
 537         }
 538         else
 539         {
 540             // start_vertex is inside
 541             if(end_vertex_signed_distance < 0)
 542             {
 543                 // end_vertex is outside
 544                 output_vertexes[output_vertex_count++] = interpolate_vec4(
 545                     solve_for_t(start_vertex_signed_distance, end_vertex_signed_distance),
 546                     start_vertex,
 547                     end_vertex);
 548             }
 549             else
 550             {
 551                 // end_vertex is inside
 552                 output_vertexes[output_vertex_count++] = end_vertex;
 553             }
 554         }
 555         return true;
 556     };
 557     auto clip_triangles = [clip_edge](
 558         std::vector<Triangle> &triangles, std::vector<Triangle> &temp_triangles, auto eval_vertex)
 559     {
 560         temp_triangles.clear();
 561         for(auto &input_ref : triangles)
 562         {
 563             Triangle input = input_ref; // copy to enable compiler optimizations
 564             constexpr std::size_t max_clipped_output_vertex_count = 4;
 565             Vec4 output_vertexes[max_clipped_output_vertex_count];
 566             std::size_t output_vertex_count = 0;
 567             bool skip_triangle = false;
 568             std::size_t end_vertex_index = 1;
 569             for(std::size_t start_vertex_index = 0; start_vertex_index < triangle_vertex_count;
 570                 start_vertex_index++)
 571             {
 572                 if(!clip_edge(input.vertexes[start_vertex_index],
 573                               input.vertexes[end_vertex_index],
 574                               output_vertexes,
 575                               output_vertex_count,
 576                               eval_vertex))
 577                 {
 578                     skip_triangle = true;
 579                     break;
 580                 }
 581                 if(++end_vertex_index >= triangle_vertex_count)
 582                     end_vertex_index = 0;
 583             }
 584             if(skip_triangle)
 585                 continue;
 586             switch(output_vertex_count)
 587             {
 588             case 0:
 589             case 1:
 590             case 2:
 591                 continue;
 592             case 3:
 593                 temp_triangles.push_back(
 594                     Triangle(output_vertexes[0], output_vertexes[1], output_vertexes[2]));
 595                 continue;
 596             case 4:
 597                 temp_triangles.push_back(
 598                     Triangle(output_vertexes[0], output_vertexes[1], output_vertexes[2]));
 599                 temp_triangles.push_back(
 600                     Triangle(output_vertexes[0], output_vertexes[2], output_vertexes[3]));
 601                 continue;
 602             }
 603             assert(!"clipping algorithm failed");
 604         }
 605         temp_triangles.swap(triangles);
 606     };
 607     std::vector<Triangle> triangles;
 608     std::vector<Triangle> temp_triangles;
 609     constexpr std::size_t chunk_max_size = 96;
 610     static_assert(chunk_max_size % triangle_vertex_count == 0, "");
 611     std::unique_ptr<unsigned char[]> chunk_vertex_buffer(
 612         new unsigned char[get_vertex_shader_output_struct_size() * chunk_max_size]);
 613     while(vertex_start_index < vertex_end_index)
 614     {
 615         std::uint32_t chunk_size = vertex_end_index - vertex_start_index;
 616         if(chunk_size > chunk_max_size)
 617             chunk_size = chunk_max_size;
 618         auto current_vertex_start_index = vertex_start_index;
 619         vertex_start_index += chunk_size;
 620         run_vertex_shader(current_vertex_start_index,
 621                           current_vertex_start_index + chunk_size,
 622                           instance_id,
 623                           chunk_vertex_buffer.get(),
 624                           bindings);
 625         const unsigned char *current_vertex =
 626             chunk_vertex_buffer.get() + vertex_shader_position_output_offset;
 627         triangles.clear();
 628         for(std::uint32_t i = 0; i + triangle_vertex_count <= chunk_size;
 629             i += triangle_vertex_count)
 630         {
 631             Triangle triangle;
 632             for(std::size_t j = 0; j < triangle_vertex_count; j++)
 633             {
 634                 triangle.vertexes[j] = *reinterpret_cast<const Vec4 *>(current_vertex);
 635                 current_vertex += vertex_shader_output_struct_size;
 636             }
 637             triangles.push_back(triangle);
 638         }
 639         // clip to 0 <= vertex.z
 640         clip_triangles(triangles,
 641                        temp_triangles,
 642                        [](const Vec4 &vertex) noexcept->float
 643                        {
 644                            return vertex.z;
 645                        });
 646         // clip to vertex.z <= vertex.w
 647         clip_triangles(triangles,
 648                        temp_triangles,
 649                        [](const Vec4 &vertex) noexcept->float
 650                        {
 651                            return vertex.w - vertex.z;
 652                        });
 653         // clip to -vertex.w <= vertex.x
 654         clip_triangles(triangles,
 655                        temp_triangles,
 656                        [](const Vec4 &vertex) noexcept->float
 657                        {
 658                            return vertex.x + vertex.w;
 659                        });
 660         // clip to vertex.x <= vertex.w
 661         clip_triangles(triangles,
 662                        temp_triangles,
 663                        [](const Vec4 &vertex) noexcept->float
 664                        {
 665                            return vertex.w - vertex.x;
 666                        });
 667         // clip to -vertex.w <= vertex.y
 668         clip_triangles(triangles,
 669                        temp_triangles,
 670                        [](const Vec4 &vertex) noexcept->float
 671                        {
 672                            return vertex.y + vertex.w;
 673                        });
 674         // clip to vertex.y <= vertex.w
 675         clip_triangles(triangles,
 676                        temp_triangles,
 677                        [](const Vec4 &vertex) noexcept->float
 678                        {
 679                            return vertex.w - vertex.y;
 680                        });
 681         VkOffset2D clipped_scissor_rect_min = scissor_rect.offset;
 682         VkOffset2D clipped_scissor_rect_end = {
 683             .x = scissor_rect.offset.x + static_cast<std::int32_t>(scissor_rect.extent.width),
 684             .y = scissor_rect.offset.y + static_cast<std::int32_t>(scissor_rect.extent.height),
 685         };
 686         if(clipped_scissor_rect_min.x < 0)
 687             clipped_scissor_rect_min.x = 0;
 688         if(clipped_scissor_rect_min.y < 0)
 689             clipped_scissor_rect_min.y = 0;
 690         if(clipped_scissor_rect_end.x > color_attachment.descriptor.extent.width)
 691             clipped_scissor_rect_end.x = color_attachment.descriptor.extent.width;
 692         if(clipped_scissor_rect_end.y < color_attachment.descriptor.extent.height)
 693             clipped_scissor_rect_end.y = color_attachment.descriptor.extent.height;
 694         if(clipped_scissor_rect_end.x <= clipped_scissor_rect_min.x)
 695             continue;
 696         if(clipped_scissor_rect_end.y <= clipped_scissor_rect_min.y)
 697             continue;
 698         for(std::size_t triangle_index = 0; triangle_index < triangles.size(); triangle_index++)
 699         {
 700             Triangle triangle = triangles[triangle_index];
 701             Vec4 projected_triangle_and_inv_w[triangle_vertex_count];
 702             Vec4 framebuffer_coordinates[triangle_vertex_count];
 703             for(std::size_t i = 0; i < triangle_vertex_count; i++)
 704             {
 705                 projected_triangle_and_inv_w[i].w = 1.0f / triangle.vertexes[i].w;
 706                 projected_triangle_and_inv_w[i].x =
 707                     triangle.vertexes[i].x * projected_triangle_and_inv_w[i].w;
 708                 projected_triangle_and_inv_w[i].y =
 709                     triangle.vertexes[i].y * projected_triangle_and_inv_w[i].w;
 710                 projected_triangle_and_inv_w[i].z =
 711                     triangle.vertexes[i].z * projected_triangle_and_inv_w[i].w;
 712                 framebuffer_coordinates[i] =
 713                     Vec4(projected_triangle_and_inv_w[i].x * viewport_x_scale + viewport_x_offset,
 714                          projected_triangle_and_inv_w[i].y * viewport_y_scale + viewport_y_offset,
 715                          projected_triangle_and_inv_w[i].z * viewport_z_scale + viewport_z_offset,
 716                          0);
 717             }
 718             float orientation = 0;
 719             for(std::size_t start_vertex_index = 0, end_vertex_index = 1;
 720                 start_vertex_index < triangle_vertex_count;
 721                 start_vertex_index++)
 722             {
 723                 float x1 = framebuffer_coordinates[start_vertex_index].x;
 724                 float y1 = framebuffer_coordinates[start_vertex_index].y;
 725                 float x2 = framebuffer_coordinates[end_vertex_index].x;
 726                 float y2 = framebuffer_coordinates[end_vertex_index].y;
 727                 orientation += x2 * y1 - x1 * y2;
 728                 if(++end_vertex_index >= triangle_vertex_count)
 729                     end_vertex_index = 0;
 730             }
 731             if(!(orientation < 0)
 732                && !(orientation > 0)) // zero area triangle or triangle coordinate is NaN
 733                 continue;
 734             // orientation > 0 for counter-clockwise triangle
 735             // orientation < 0 for clockwise triangle
 736             std::int32_t min_x, end_x, min_y, end_y;
 737             bool first = true;
 738             for(std::size_t i = 0; i < triangle_vertex_count; i++)
 739             {
 740                 // x and y will be >= 0 so we can use truncate instead of floor for speed
 741                 auto current_min_x = static_cast<std::int32_t>(framebuffer_coordinates[i].x);
 742                 auto current_min_y = static_cast<std::int32_t>(framebuffer_coordinates[i].y);
 743                 std::int32_t current_end_x = current_min_x + 1;
 744                 std::int32_t current_end_y = current_min_y + 1;
 745                 if(first || current_min_x < min_x)
 746                     min_x = current_min_x;
 747                 if(first || current_end_x > end_x)
 748                     end_x = current_end_x;
 749                 if(first || current_min_y < min_y)
 750                     min_y = current_min_y;
 751                 if(first || current_end_y > end_y)
 752                     end_y = current_end_y;
 753                 first = false;
 754             }
 755             if(min_x < clipped_scissor_rect_min.x)
 756                 min_x = clipped_scissor_rect_min.x;
 757             if(end_x > clipped_scissor_rect_end.x)
 758                 end_x = clipped_scissor_rect_end.x;
 759             if(min_y < clipped_scissor_rect_min.y)
 760                 min_y = clipped_scissor_rect_min.y;
 761             if(end_y > clipped_scissor_rect_end.y)
 762                 end_y = clipped_scissor_rect_end.y;
 763             constexpr int log2_scale = 16;
 764             constexpr auto scale = 1LL << log2_scale;
 765             typedef std::int64_t Edge_equation_integer_type;
 766             struct Edge_equation
 767             {
 768                 Edge_equation_integer_type a;
 769                 Edge_equation_integer_type b;
 770                 Edge_equation_integer_type c;
 771                 Edge_equation_integer_type padding;
 772                 constexpr Edge_equation() noexcept : a(), b(), c(), padding()
 773                 {
 774                 }
 775                 constexpr Edge_equation(Edge_equation_integer_type a,
 776                                         Edge_equation_integer_type b,
 777                                         Edge_equation_integer_type c) noexcept : a(a),
 778                                                                                  b(b),
 779                                                                                  c(c),
 780                                                                                  padding()
 781                 {
 782                 }
 783                 constexpr bool inside(std::int32_t x, std::int32_t y) const noexcept
 784                 {
 785                     return a * x + b * y + c >= 0;
 786                 }
 787             };
 788             Edge_equation edge_equations[triangle_vertex_count];
 789             bool skip_triangle = false;
 790             for(std::size_t start_vertex_index = 0, end_vertex_index = 1, other_vertex_index = 2;
 791                 start_vertex_index < triangle_vertex_count;
 792                 start_vertex_index++)
 793             {
 794                 float x1_float = framebuffer_coordinates[start_vertex_index].x;
 795                 float y1_float = framebuffer_coordinates[start_vertex_index].y;
 796                 float x2_float = framebuffer_coordinates[end_vertex_index].x;
 797                 float y2_float = framebuffer_coordinates[end_vertex_index].y;
 798                 [[gnu::unused]] float x3_float = framebuffer_coordinates[other_vertex_index].x;
 799                 [[gnu::unused]] float y3_float = framebuffer_coordinates[other_vertex_index].y;
 800                 auto x1_fixed = static_cast<Edge_equation_integer_type>(x1_float * scale);
 801                 auto y1_fixed = static_cast<Edge_equation_integer_type>(y1_float * scale);
 802                 auto x2_fixed = static_cast<Edge_equation_integer_type>(x2_float * scale);
 803                 auto y2_fixed = static_cast<Edge_equation_integer_type>(y2_float * scale);
 804                 [[gnu::unused]] auto x3_fixed =
 805                     static_cast<Edge_equation_integer_type>(x3_float * scale);
 806                 [[gnu::unused]] auto y3_fixed =
 807                     static_cast<Edge_equation_integer_type>(y3_float * scale);
 808                 Edge_equation_integer_type a;
 809                 Edge_equation_integer_type b;
 810                 Edge_equation_integer_type c;
 811                 {
 812                     // solve a * x1 + b * y1 + c == 0 &&
 813                     // a * x2 + b * y2 + c == 0 &&
 814                     // a * x3 + b * y3 + c >= 0
 815                     if(x1_fixed == x2_fixed && y1_fixed == y2_fixed)
 816                     {
 817                         // rounded to a zero-area triangle
 818                         skip_triangle = true;
 819                         break;
 820                     }
 821                     Edge_equation_integer_type a_fixed = (y1_fixed - y2_fixed) * scale;
 822                     Edge_equation_integer_type b_fixed = (x2_fixed - x1_fixed) * scale;
 823                     Edge_equation_integer_type c_fixed =
 824                         (x1_fixed * y2_fixed - x2_fixed * y1_fixed);
 825
 826                     // offset to end up checking at pixel center instead of top-left pixel corner
 827                     c_fixed += (a_fixed + b_fixed) / 2;
 828
 829                     a = a_fixed;
 830                     b = b_fixed;
 831                     c = c_fixed;
 832                     if(orientation > 0)
 833                     {
 834                         // fix sign
 835                         a = -a;
 836                         b = -b;
 837                         c = -c;
 838                     }
 839                 }
 840                 // handle top-left fill rule
 841                 if(a < 0 || (a == 0 && b < 0))
 842                 {
 843                     // not a top-left edge, fixup c
 844                     // effectively changes the '>=' to '>' in Edge_equation::inside
 845                     c--;
 846                 }
 847
 848                 edge_equations[start_vertex_index] = Edge_equation(a, b, c);
 849                 if(++end_vertex_index >= triangle_vertex_count)
 850                     end_vertex_index = 0;
 851                 if(++other_vertex_index >= triangle_vertex_count)
 852                     other_vertex_index = 0;
 853             }
 854             if(skip_triangle)
 855                 continue;
 856             auto fs = this->fragment_shader_function;
 857             for(std::int32_t y = min_y; y < end_y; y++)
 858             {
 859                 for(std::int32_t x = min_x; x < end_x; x++)
 860                 {
 861                     bool inside = true;
 862                     for(auto &edge_equation : edge_equations)
 863                     {
 864                         inside &= edge_equation.inside(x, y);
 865                     }
 866                     if(inside)
 867                     {
 868                         auto *pixel = reinterpret_cast<Pixel_type *>(
 869                             color_attachment_memory
 870                             + (static_cast<std::size_t>(x) * color_attachment_pixel_size
 871                                + static_cast<std::size_t>(y) * color_attachment_stride));
 872                         fs(pixel);
 873                     }
 874                 }
 875             }
 876         };
 877     }
 878 }
 879
 880 std::unique_ptr<Graphics_pipeline> Graphics_pipeline::make(
 881     Pipeline_cache *pipeline_cache, const VkGraphicsPipelineCreateInfo &create_info)
 882 {
 883     assert(create_info.sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
 884     auto *render_pass = Render_pass_handle::from_handle(create_info.renderPass);
 885     assert(render_pass);
 886     auto *pipeline_layout = Pipeline_layout_handle::from_handle(create_info.layout);
 887     assert(pipeline_layout);
 888     if(create_info.flags & VK_PIPELINE_CREATE_DERIVATIVE_BIT)
 889     {
 890 #warning implement creating derived pipelines
 891         throw std::runtime_error("creating derived pipelines is not implemented");
 892     }
 893     auto implementation = std::make_shared<Implementation>();
 894     auto optimization_level = ::LLVMCodeGenLevelDefault;
 895     if(create_info.flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
 896         optimization_level = ::LLVMCodeGenLevelNone;
 897     auto llvm_target_machine =
 898         llvm_wrapper::Target_machine::create_native_target_machine(optimization_level);
 899     implementation->compiled_shaders.reserve(create_info.stageCount);
 900     util::Enum_set<spirv::Execution_model> found_shader_stages;
 901     for(std::size_t i = 0; i < create_info.stageCount; i++)
 902     {
 903         auto &stage_info = create_info.pStages[i];
 904         auto execution_models =
 905             vulkan::get_execution_models_from_shader_stage_flags(stage_info.stage);
 906         assert(execution_models.size() == 1);
 907         auto execution_model = *execution_models.begin();
 908         bool added_to_found_shader_stages =
 909             std::get<1>(found_shader_stages.insert(execution_model));
 910         if(!added_to_found_shader_stages)
 911             throw std::runtime_error("duplicate shader stage");
 912         auto *shader_module = Shader_module_handle::from_handle(stage_info.module);
 913         assert(shader_module);
 914         {
 915             spirv::Dump_callbacks dump_callbacks;
 916             try
 917             {
 918                 spirv::parse(dump_callbacks, shader_module->words(), shader_module->word_count());
 919             }
 920             catch(spirv::Parser_error &e)
 921             {
 922                 std::cerr << dump_callbacks.ss.str() << std::endl;
 923                 throw;
 924             }
 925             std::cerr << dump_callbacks.ss.str() << std::endl;
 926         }
 927         assert(create_info.pVertexInputState);
 928         assert(create_info.pVertexInputState->sType == VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO);
 929         auto compiled_shader = spirv_to_llvm::spirv_to_llvm(implementation->llvm_context.get(),
 930                                                             llvm_target_machine.get(),
 931                                                             shader_module->words(),
 932                                                             shader_module->word_count(),
 933                                                             implementation->compiled_shaders.size(),
 934                                                             execution_model,
 935                                                             stage_info.pName,
 936                                                             create_info.pVertexInputState);
 937         std::cerr << "Translation to LLVM succeeded." << std::endl;
 938         ::LLVMDumpModule(compiled_shader.module.get());
 939         bool failed =
 940             ::LLVMVerifyModule(compiled_shader.module.get(), ::LLVMPrintMessageAction, nullptr);
 941         if(failed)
 942             throw std::runtime_error("LLVM module verification failed");
 943         implementation->compiled_shaders.push_back(std::move(compiled_shader));
 944     }
 945     implementation->data_layout = llvm_target_machine.create_target_data_layout();
 946     implementation->jit_stack =
 947         llvm_wrapper::Orc_compile_stack::create(std::move(llvm_target_machine), optimize_module);
 948     Vertex_shader_function vertex_shader_function = nullptr;
 949     std::size_t vertex_shader_output_struct_size = 0;
 950     util::optional<std::size_t> vertex_shader_position_output_offset;
 951     Fragment_shader_function fragment_shader_function = nullptr;
 952     for(auto &compiled_shader : implementation->compiled_shaders)
 953     {
 954         implementation->jit_stack.add_eagerly_compiled_ir(
 955             std::move(compiled_shader.module),
 956             &spirv_to_llvm::Jit_symbol_resolver::resolve,
 957             static_cast<void *>(&implementation->jit_symbol_resolver));
 958         auto shader_entry_point_address = implementation->jit_stack.get_symbol_address(
 959             compiled_shader.entry_function_name.c_str());
 960         std::cerr << "shader entry: " << compiled_shader.entry_function_name << ": "
 961                   << reinterpret_cast<void *>(shader_entry_point_address) << std::endl;
 962         assert(shader_entry_point_address);
 963         switch(compiled_shader.execution_model)
 964         {
 965         case spirv::Execution_model::fragment:
 966             fragment_shader_function =
 967                 reinterpret_cast<Fragment_shader_function>(shader_entry_point_address);
 968 #warning finish implementing Graphics_pipeline::make
 969             continue;
 970 #warning finish implementing Graphics_pipeline::make
 971             throw std::runtime_error("creating fragment shaders is not implemented");
 972         case spirv::Execution_model::geometry:
 973 #warning finish implementing Graphics_pipeline::make
 974             throw std::runtime_error("creating geometry shaders is not implemented");
 975         case spirv::Execution_model::gl_compute:
 976         case spirv::Execution_model::kernel:
 977             throw std::runtime_error("can't create compute shaders from Graphics_pipeline::make");
 978         case spirv::Execution_model::tessellation_control:
 979         case spirv::Execution_model::tessellation_evaluation:
 980 #warning finish implementing Graphics_pipeline::make
 981             throw std::runtime_error("creating tessellation shaders is not implemented");
 982         case spirv::Execution_model::vertex:
 983         {
 984             vertex_shader_function =
 985                 reinterpret_cast<Vertex_shader_function>(shader_entry_point_address);
 986             implementation->vertex_shader_output_struct = compiled_shader.outputs_struct;
 987             auto llvm_vertex_shader_output_struct =
 988                 implementation->vertex_shader_output_struct->get_or_make_type().type;
 989             vertex_shader_output_struct_size = ::LLVMABISizeOfType(
 990                 implementation->data_layout.get(), llvm_vertex_shader_output_struct);
 991             for(auto &member : implementation->vertex_shader_output_struct->get_members(true))
 992             {
 993                 for(auto &decoration : member.decorations)
 994                 {
 995                     if(decoration.value == spirv::Decoration::built_in)
 996                     {
 997                         auto &builtin =
 998                             util::get<spirv::Decoration_built_in_parameters>(decoration.parameters);
 999                         if(builtin.built_in == spirv::Built_in::position)
1000                         {
1001                             vertex_shader_position_output_offset =
1002                                 ::LLVMOffsetOfElement(implementation->data_layout.get(),
1003                                                       llvm_vertex_shader_output_struct,
1004                                                       member.llvm_member_index);
1005                             break;
1006                         }
1007                     }
1008                 }
1009                 if(vertex_shader_position_output_offset)
1010                     break;
1011                 if(auto *struct_type =
1012                        dynamic_cast<spirv_to_llvm::Struct_type_descriptor *>(member.type.get()))
1013                 {
1014                     std::size_t struct_offset =
1015                         ::LLVMOffsetOfElement(implementation->data_layout.get(),
1016                                               llvm_vertex_shader_output_struct,
1017                                               member.llvm_member_index);
1018                     auto llvm_struct_type = struct_type->get_or_make_type().type;
1019                     for(auto &submember : struct_type->get_members(true))
1020                     {
1021                         for(auto &decoration : submember.decorations)
1022                         {
1023                             if(decoration.value == spirv::Decoration::built_in)
1024                             {
1025                                 auto &builtin = util::get<spirv::Decoration_built_in_parameters>(
1026                                     decoration.parameters);
1027                                 if(builtin.built_in == spirv::Built_in::position)
1028                                 {
1029                                     vertex_shader_position_output_offset =
1030                                         struct_offset
1031                                         + ::LLVMOffsetOfElement(implementation->data_layout.get(),
1032                                                                 llvm_struct_type,
1033                                                                 submember.llvm_member_index);
1034                                     break;
1035                                 }
1036                             }
1037                         }
1038                         if(vertex_shader_position_output_offset)
1039                             break;
1040                     }
1041                 }
1042                 if(vertex_shader_position_output_offset)
1043                     break;
1044             }
1045             if(!vertex_shader_position_output_offset)
1046                 throw std::runtime_error("can't find vertex shader Position output");
1047 #warning finish implementing Graphics_pipeline::make
1048             continue;
1049         }
1050         }
1051         throw std::runtime_error("unknown shader kind");
1052     }
1053 #warning finish implementing Graphics_pipeline::make
1054     if(!vertex_shader_function)
1055         throw std::runtime_error("graphics pipeline doesn't have vertex shader");
1056     if(!create_info.pViewportState)
1057         throw std::runtime_error("missing viewport state");
1058     if(create_info.pViewportState->viewportCount != 1)
1059         throw std::runtime_error("unimplemented viewport count");
1060     if(!create_info.pViewportState->pViewports)
1061         throw std::runtime_error("missing viewport list");
1062     if(!create_info.pViewportState->pScissors)
1063         throw std::runtime_error("missing scissor rectangle list");
1064     assert(vertex_shader_position_output_offset);
1065     return std::unique_ptr<Graphics_pipeline>(
1066         new Graphics_pipeline(std::move(implementation),
1067                               vertex_shader_function,
1068                               vertex_shader_output_struct_size,
1069                               *vertex_shader_position_output_offset,
1070                               fragment_shader_function,
1071                               create_info.pViewportState->pViewports[0],
1072                               create_info.pViewportState->pScissors[0]));
1073 }
1074 }
1075 }