1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 12:12:50 +01:00

Improved Zen chip detection

This commit is contained in:
kd-11 2020-12-09 21:29:48 +03:00 committed by kd-11
parent 0e278d2299
commit cbe0000d23
3 changed files with 166 additions and 84 deletions

View File

@ -2498,7 +2498,7 @@ u64 thread_ctrl::get_affinity_mask(thread_class group)
if (const auto thread_count = std::thread::hardware_concurrency())
{
const u64 all_cores_mask = thread_count < 64 ? UINT64_MAX >> (64 - thread_count): UINT64_MAX;
const u64 all_cores_mask = get_process_affinity_mask();
switch (g_native_core_layout)
{
@ -2509,105 +2509,140 @@ u64 thread_ctrl::get_affinity_mask(thread_class group)
}
case native_core_arrangement::amd_ccx:
{
if (thread_count <= 8)
{
// Single CCX or not enough threads, do nothing
return all_cores_mask;
}
u64 spu_mask, ppu_mask, rsx_mask;
spu_mask = ppu_mask = rsx_mask = all_cores_mask; // Fallback, in case someone is messing with core config
const auto system_id = utils::get_cpu_brand();
if (thread_count >= 32)
const auto family_id = utils::get_cpu_family();
const auto model_id = utils::get_cpu_model();
switch (family_id)
{
if (system_id.find("3950X") != umax)
case 0x17: // Zen, Zen+, Zen2
case 0x18: // Dhyana core (Zen)
{
// zen2
// Ryzen 9 3950X
// Assign threads 9-32
if (model_id > 0x30)
{
// Zen2 (models 49, 96, 113, 144)
// Much improved inter-CCX latency
switch (thread_count)
{
case 128:
case 64:
case 48:
case 32:
// TR 3000 series, or R9 3950X, Assign threads 9-32
ppu_mask = 0b11111111000000000000000000000000;
spu_mask = 0b00000000111111110000000000000000;
rsx_mask = 0b00000000000000001111111100000000;
}
else if (system_id.find("2970WX") != umax)
{
// zen+
// Threadripper 2970WX
// Assign threads 9-24
ppu_mask = 0b000000111111000000000000;
spu_mask = ppu_mask;
rsx_mask = 0b111111000000000000000000;
}
else
{
// zen(+)
// Threadripper 1950X/2950X/2990WX
// Assign threads 17-32
ppu_mask = 0b00000000111111110000000000000000;
spu_mask = ppu_mask;
rsx_mask = 0b11111111000000000000000000000000;
}
}
else if (thread_count == 24)
{
if (system_id.find("3900X") != umax)
{
// zen2
// Ryzen 9 3900X
// Assign threads 7-22
break;
case 24:
// 3900X, Assign threads 7-22
ppu_mask = 0b111111000000000000000000;
spu_mask = 0b000000111111000000000000;
rsx_mask = 0b000000000000111111000000;
}
else
{
// zen(+)
// Threadripper 1920X/2920X
// Assign threads 13-24
ppu_mask = 0b000000111111000000000000;
spu_mask = ppu_mask;
rsx_mask = 0b111111000000000000000000;
}
}
else if (thread_count == 16)
{
if (system_id.find("3700X") != umax || system_id.find("3800X") != umax)
{
// Ryzen 7 3700/3800 (x)
// Assign threads 1-16
break;
case 16:
// 3700, 3800 family, Assign threads 1-16
ppu_mask = 0b0000000011110000;
spu_mask = 0b1111111100000000;
rsx_mask = 0b0000000000001111;
}
else
{
// zen(+)
// Ryzen 7, Threadripper
// Assign threads 3-16
ppu_mask = 0b1111111100000000;
spu_mask = ppu_mask;
rsx_mask = 0b0000000000111100;
}
}
else if (thread_count == 12)
{
if (system_id.find("3600") != umax)
{
// zen2
// R5 3600 (x)
// Assign threads 1-12
break;
case 12:
// 3600 family, Assign threads 1-12
ppu_mask = 0b000000111000;
spu_mask = 0b111111000000;
rsx_mask = 0b000000000111;
break;
default:
break;
}
}
else
{
// zen(+)
// R5 1600/2600 (x)
// Assign threads 3-12
// Zen, Zen+ (models 1, 8(+), 17, 24(+), 32)
switch (thread_count)
{
case 64:
// TR 2990WX, Assign threads 17-32
ppu_mask = 0b00000000111111110000000000000000;
spu_mask = ppu_mask;
rsx_mask = 0b11111111000000000000000000000000;
break;
case 48:
// TR 2970WX, Assign threads 9-24
ppu_mask = 0b000000111111000000000000;
spu_mask = ppu_mask;
rsx_mask = 0b111111000000000000000000;
break;
case 32:
// TR 2950X, TR 1950X, Assign threads 17-32
ppu_mask = 0b00000000111111110000000000000000;
spu_mask = ppu_mask;
rsx_mask = 0b11111111000000000000000000000000;
break;
case 24:
// TR 1920X, 2920X, Assign threads 13-24
ppu_mask = 0b000000111111000000000000;
spu_mask = ppu_mask;
rsx_mask = 0b111111000000000000000000;
break;
case 16:
// 1700, 1800, 2700, TR 1900X family
ppu_mask = 0b1111111100000000;
spu_mask = ppu_mask;
rsx_mask = 0b0000000000111100;
break;
case 12:
// 1600, 2600 family, Assign threads 3-12
ppu_mask = 0b111111000000;
spu_mask = ppu_mask;
rsx_mask = 0b000000111100;
break;
default:
break;
}
}
break;
}
case 0x19: // Zen3
{
// Single-CCX architecture, just disable SMT if wide enough
// CCX now holds upto 16 threads
// Lack of hw availability makes testing difficult
switch (thread_count)
{
case 24:
// 5900X, Use same scheduler as 3900X
// Unverified on windows, may be worse than just disabling SMT and scheduler
ppu_mask = 0b111111000000000000000000;
spu_mask = 0b000000111111000000000000;
rsx_mask = 0b000000000000111111000000;
break;
default:
if (thread_count >= 16)
{
// Verified by more than one windows user on 16-thread CPU
ppu_mask = spu_mask = rsx_mask = (0b10101010101010101010101010101010 & all_cores_mask);
}
else
{
// R5 & R3 don't seem to improve performance no matter how these are shuffled
ppu_mask = spu_mask = rsx_mask = 0b11111111 & all_cores_mask;
ppu_mask = spu_mask = rsx_mask = all_cores_mask;
}
break;
}
break;
}
default:
{
break;
}
}
switch (group)

View File

@ -417,3 +417,46 @@ u32 utils::get_thread_count()
return ::sysconf(_SC_NPROCESSORS_ONLN);
#endif
}
u32 utils::get_cpu_family()
{
static const u32 g_value = []()
{
const u32 reg_value = get_cpuid(0x00000001, 0)[0]; // Processor feature info
const u32 base_value = (reg_value >> 8) & 0xF;
if (base_value == 0xF) [[likely]]
{
const u32 extended_value = (reg_value >> 20) & 0xFF;
return base_value + extended_value;
}
else
{
return base_value;
}
}();
return g_value;
}
u32 utils::get_cpu_model()
{
static const u32 g_value = []()
{
const u32 reg_value = get_cpuid(0x00000001, 0)[0]; // Processor feature info
const u32 base_value = (reg_value >> 4) & 0xF;
if (const auto base_family_id = (reg_value >> 8) & 0xF;
base_family_id == 0x6 || base_family_id == 0xF) [[likely]]
{
const u32 extended_value = (reg_value >> 16) & 0xF;
return base_value + (extended_value << 4);
}
else
{
return base_value;
}
}();
return g_value;
}

View File

@ -50,4 +50,8 @@ namespace utils
u64 get_total_memory();
u32 get_thread_count();
u32 get_cpu_family();
u32 get_cpu_model();
}