/**
* @param int $process_count
* The number of processes we'd like to divide work up
* amongst.
*
* @param string[] $analysis_file_list
* A list of files that should be analyzed which will be
* used to ignore any files outside of the list and to
* draw from for any missing files.
*
* @return string[][]
* A map from process_id to a list of files to be analyzed
* on that process in stable ordering.
*/
public function orderForProcessCount(int $process_count, array $analysis_file_list) : array
{
assert($process_count > 0, "The process count must be greater than zero.");
if (Config::get()->randomize_file_order) {
$random_proc_file_map = [];
$i = 0;
shuffle($analysis_file_list);
foreach ($analysis_file_list as $i => $file) {
$random_proc_file_map[$i++ % $process_count][] = $file;
}
return $random_proc_file_map;
}
// Construct a Hasher implementation based on config.
if (Config::get()->consistent_hashing_file_order) {
sort($analysis_file_list, SORT_STRING);
$hasher = new Consistent($process_count);
} else {
$hasher = new Sequential($process_count);
}
// Create a Set from the file list
$analysis_file_map = [];
foreach ($analysis_file_list as $i => $file) {
$analysis_file_map[$file] = true;
}
// A map from the root of an object hierarchy to all
// elements within that hierarchy
$root_fqsen_list = [];
$file_names_for_classes = [];
// Iterate over each class extracting files
foreach ($this->code_base->getClassMap() as $fqsen => $class) {
// We won't be analyzing internal stuff
if ($class->isInternal()) {
continue;
}
// Get the name of the file associated with the class
$file_name = $class->getContext()->getFile();
// Ignore any files that are not to be analyzed
if (!isset($analysis_file_map[$file_name])) {
continue;
}
unset($analysis_file_map[$file_name]);
$file_names_for_classes[$file_name] = $class;
}
if (Config::get()->consistent_hashing_file_order) {
ksort($file_names_for_classes, SORT_STRING);
}
foreach ($file_names_for_classes as $file_name => $class) {
// Get the class's depth in its object hierarchy and
// the FQSEN of the object at the root of its hierarchy
$hierarchy_depth = $class->getHierarchyDepth($this->code_base);
$hierarchy_root = $class->getHierarchyRootFQSEN($this->code_base);
// Create a bucket for this root if it doesn't exist
if (empty($root_fqsen_list[(string) $hierarchy_root])) {
$root_fqsen_list[(string) $hierarchy_root] = [];
}
// Append this {file,depth} pair to the hierarchy
// root
$root_fqsen_list[(string) $hierarchy_root][] = ['file' => $file_name, 'depth' => $hierarchy_depth];
}
// Create a map from processor_id to the list of files
// to be analyzed on that processor
$processor_file_list_map = [];
// Sort the set of files with a given root by their
// depth in the hierarchy
foreach ($root_fqsen_list as $root_fqsen => $list) {
// Sort first by depth, and break ties by file name lexicographically
// (usort is not a stable sort).
usort($list, function (array $a, array $b) {
return $a['depth'] <=> $b['depth'] ?: strcmp($a['file'], $b['file']);
});
// Choose which process this file list will be
// run on
$process_id = $hasher->getGroup((string) $root_fqsen);
// Append each file to this process list
foreach ($list as $item) {
$processor_file_list_map[$process_id][] = $item['file'];
}
}
// Distribute any remaining files without classes evenly
// between the processes
$hasher->reset();
foreach (array_keys($analysis_file_map) as $file) {
// Choose which process this file list will be
// run on
$process_id = $hasher->getGroup((string) $file);
$processor_file_list_map[$process_id][] = $file;
}
return $processor_file_list_map;
}