configs: set hsaTopology properties from options
authorKyle Roarty <kyleroarty1716@gmail.com>
Tue, 26 May 2020 17:03:29 +0000 (12:03 -0500)
committerKyle Roarty <kyleroarty1716@gmail.com>
Sat, 29 Aug 2020 01:49:37 +0000 (01:49 +0000)
This change sets the properties in hsaTopology to the proper values
specified by the user through command-line arguments. This ensures
that if the properties file is read by a program, it will return
the correct values for the simulated hardware.

This change also adds in a command-line argument for the lds size, as
it was the only other property used in hsaTopology that didn't have
a command-line argument. The default value (65536) is taken from
src/gpu-compute/LdsState.py

Change-Id: I17bb812491708f4221c39b738c906f1ad944614d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/31995
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Alexandru Duțu <alexandru.dutu@amd.com>
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
configs/example/apu_se.py
configs/example/hsaTopology.py

index 59dd4c5f757bf5bec111179720cb9e26b5b09c30..03418c32833bcce253cda06194879e6b206ae0e4 100644 (file)
@@ -174,6 +174,8 @@ parser.add_option("--numLdsBanks", type="int", default=32,
                   help="number of physical banks per LDS module")
 parser.add_option("--ldsBankConflictPenalty", type="int", default=1,
                   help="number of cycles per LDS bank conflict")
+parser.add_options("--lds-size", type="int", default=65536,
+                   help="Size of the LDS in bytes")
 parser.add_option('--fast-forward-pseudo-op', action='store_true',
                   help = 'fast forward using kvm until the m5_switchcpu'
                   ' pseudo-op is encountered, then switch cpus. subsequent'
@@ -290,7 +292,8 @@ for i in range(n_cu):
                                      localDataStore = \
                                      LdsState(banks = options.numLdsBanks,
                                               bankConflictPenalty = \
-                                              options.ldsBankConflictPenalty)))
+                                              options.ldsBankConflictPenalty,
+                                              size = options.lds_size)))
     wavefronts = []
     vrfs = []
     vrf_pool_mgrs = []
index df242234540358db4fef9a5d886929cc946c60a7..707a83df3dc65ec5138cea0d4c2a90e622254855 100644 (file)
@@ -36,6 +36,7 @@ from os import mkdir, makedirs, getpid, listdir, fsync
 from os.path import join as joinpath
 from os.path import isdir
 from shutil import rmtree, copyfile
+from m5.util.convert import toFrequency
 
 def file_append(path, contents):
     with open(joinpath(*path), 'a') as f:
@@ -76,30 +77,32 @@ def createHsaTopology(options):
 
     # populate global node properties
     # NOTE: SIMD count triggers a valid GPU agent creation
-    # TODO: Really need to parse these from options
-    node_prop = 'cpu_cores_count %s\n' % options.num_cpus   + \
-                'simd_count 32\n'                           + \
-                'mem_banks_count 0\n'                       + \
-                'caches_count 0\n'                          + \
-                'io_links_count 0\n'                        + \
-                'cpu_core_id_base 16\n'                     + \
-                'simd_id_base 2147483648\n'                 + \
-                'max_waves_per_simd 40\n'                   + \
-                'lds_size_in_kb 64\n'                       + \
-                'gds_size_in_kb 0\n'                        + \
-                'wave_front_size 64\n'                      + \
-                'array_count 1\n'                           + \
-                'simd_arrays_per_engine 1\n'                + \
-                'cu_per_simd_array 10\n'                    + \
-                'simd_per_cu 4\n'                           + \
-                'max_slots_scratch_cu 32\n'                 + \
-                'vendor_id 4098\n'                          + \
-                'device_id 39028\n'                         + \
-                'location_id 8\n'                           + \
-                'max_engine_clk_fcompute 800\n'             + \
-                'local_mem_size 0\n'                        + \
-                'fw_version 699\n'                          + \
-                'capability 4738\n'                         + \
-                'max_engine_clk_ccompute 2100\n'
+    node_prop = 'cpu_cores_count %s\n' % options.num_cpus                   + \
+                'simd_count %s\n'                                             \
+                    % (options.num_compute_units * options.simds_per_cu)    + \
+                'mem_banks_count 0\n'                                       + \
+                'caches_count 0\n'                                          + \
+                'io_links_count 0\n'                                        + \
+                'cpu_core_id_base 16\n'                                     + \
+                'simd_id_base 2147483648\n'                                 + \
+                'max_waves_per_simd %s\n' % options.wfs_per_simd            + \
+                'lds_size_in_kb %s\n' % int(options.lds_size / 1024)        + \
+                'gds_size_in_kb 0\n'                                        + \
+                'wave_front_size %s\n' % options.wf_size                    + \
+                'array_count 1\n'                                           + \
+                'simd_arrays_per_engine %s\n' % options.sa_per_complex      + \
+                'cu_per_simd_array %s\n' % options.cu_per_sa                + \
+                'simd_per_cu %s\n' % options.simds_per_cu                   + \
+                'max_slots_scratch_cu 32\n'                                 + \
+                'vendor_id 4098\n'                                          + \
+                'device_id 39028\n'                                         + \
+                'location_id 8\n'                                           + \
+                'max_engine_clk_fcompute %s\n'                                \
+                    % int(toFrequency(options.gpu_clock) / 1e6)             + \
+                'local_mem_size 0\n'                                        + \
+                'fw_version 699\n'                                          + \
+                'capability 4738\n'                                         + \
+                'max_engine_clk_ccompute %s\n'                                \
+                    % int(toFrequency(options.CPUClock) / 1e6)
 
     file_append((node_dir, 'properties'), node_prop)