PersonStringNlm30NameSchemaFilter::_parsePersonString PHP Method

_parsePersonString() public method

TODO: add initials from all given names to initials element
public _parsePersonString ( $personString, $title, $degrees ) : MetadataDescription
$personString string
$title boolean true to parse for title
$degrees boolean true to parse for degrees
return MetadataDescription an NLM name description or null if the string could not be converted
    function &_parsePersonString($personString, $title, $degrees)
    {
        // Expressions to parse person strings, ported from CiteULike person
        // plugin, see http://svn.citeulike.org/svn/plugins/person.tcl
        static $personRegex = array('title' => '(?:His (?:Excellency|Honou?r)\\s+|Her (?:Excellency|Honou?r)\\s+|The Right Honou?rable\\s+|The Honou?rable\\s+|Right Honou?rable\\s+|The Rt\\.? Hon\\.?\\s+|The Hon\\.?\\s+|Rt\\.? Hon\\.?\\s+|Mr\\.?\\s+|Ms\\.?\\s+|M\\/s\\.?\\s+|Mrs\\.?\\s+|Miss\\.?\\s+|Dr\\.?\\s+|Sir\\s+|Dame\\s+|Prof\\.?\\s+|Professor\\s+|Doctor\\s+|Mister\\s+|Mme\\.?\\s+|Mast(?:\\.|er)?\\s+|Lord\\s+|Lady\\s+|Madam(?:e)?\\s+|Priv\\.-Doz\\.\\s+)+', 'degrees' => '(,\\s+(?:[A-Z\\.]+))+', 'initials' => '(?:(?:[A-Z]\\.){1,3}[A-Z]\\.?)|(?:(?:[A-Z]\\.\\s){1,3}[A-Z]\\.?)|(?:[A-Z]{1,4})|(?:(?:[A-Z]\\.-?){1,4})|(?:(?:[A-Z]\\.-?){1,3}[A-Z]\\.?)|(?:(?:[A-Z]-){1,3}[A-Z])|(?:(?:[A-Z]\\s){1,3}[A-Z]\\.?)|(?:(?:[A-Z]-){1,3}[A-Z]\\.?)', 'prefix' => 'Dell(?:[a|e])?(?:\\s|$)|Dalle(?:\\s|$)|D[a|e]ll\'(?:\\s|$)|Dela(?:\\s|$)|Del(?:\\s|$)|[Dd]e(?:\\s|$)(?:La(?:\\s|$)|Los(?:\\s|$))?|[Dd]e(?:\\s|$)|[Dd][a|i|u](?:\\s|$)|L[a|e|o](?:\\s|$)|[D|L|O]\'|St\\.?(?:\\s|$)|San(?:\\s|$)|[Dd]en(?:\\s|$)|[Vv]on(?:\\s|$)(?:[Dd]er(?:\\s|$))?|(?:[Ll][ea](?:\\s|$))?[Vv]an(?:\\s|$)(?:[Dd]e(?:n|r)?(?:\\s|$))?', 'givenName' => '(?:[^ \\t\\n\\r\\f\\v,.;()]{2,}|[^ \\t\\n\\r\\f\\v,.;()]{2,}\\-[^ \\t\\n\\r\\f\\v,.;()]{2,})');
        // The expressions for given name, suffix and surname are the same
        $personRegex['surname'] = $personRegex['suffix'] = $personRegex['givenName'];
        $personRegex['double-surname'] = "(?:" . $personRegex['surname'] . "\\s)*" . $personRegex['surname'];
        // Shortcut for prefixed surname
        $personRegexPrefixedSurname = "(?P<prefix>(?:" . $personRegex['prefix'] . ")?)(?P<surname>" . $personRegex['surname'] . ")";
        $personRegexPrefixedDoubleSurname = "(?P<prefix>(?:" . $personRegex['prefix'] . ")?)(?P<surname>" . $personRegex['double-surname'] . ")";
        // Instantiate the target person description
        $personDescription = new MetadataDescription('lib.pkp.plugins.metadata.nlm30.schema.Nlm30NameSchema', $this->_assocType);
        // Clean the person string
        $personString = trim($personString);
        // 1. Extract title and degree from the person string and use this as suffix
        $suffixString = '';
        $results = array();
        if ($title && PKPString::regexp_match_get('/^(' . $personRegex['title'] . ')/i', $personString, $results)) {
            $suffixString = trim($results[1], ',:; ');
            $personString = PKPString::regexp_replace('/^(' . $personRegex['title'] . ')/i', '', $personString);
        }
        if ($degrees && PKPString::regexp_match_get('/(' . $personRegex['degrees'] . ')$/i', $personString, $results)) {
            $degreesArray = explode(',', trim($results[1], ','));
            foreach ($degreesArray as $key => $degree) {
                $degreesArray[$key] = PKPString::trimPunctuation($degree);
            }
            $suffixString .= ' - ' . implode('; ', $degreesArray);
            $personString = PKPString::regexp_replace('/(' . $personRegex['degrees'] . ')$/i', '', $personString);
        }
        if (!empty($suffixString)) {
            $personDescription->addStatement('suffix', $suffixString);
        }
        // Space initials when followed by a given name or last name.
        $personString = PKPString::regexp_replace('/([A-Z])\\.([A-Z][a-z])/', '\\1. \\2', $personString);
        // 2. Extract names and initials from the person string
        // The parser expressions are ordered by specificity. The most specific expressions
        // come first. Only if these specific expressions don't work will we turn to less
        // specific ones. This avoids parsing errors. It also explains why we don't use the
        // ?-quantifier for optional elements like initials or middle name where they could
        // be misinterpreted.
        $personExpressions = array('/^' . $personRegexPrefixedSurname . '$/i', '/^(?P<initials>' . $personRegex['initials'] . ')\\s' . $personRegexPrefixedSurname . '$/', '/^' . $personRegexPrefixedSurname . ',?\\s(?P<initials>' . $personRegex['initials'] . ')$/', '/^' . $personRegexPrefixedDoubleSurname . ',\\s(?P<givenName>' . $personRegex['givenName'] . ')\\s(?P<initials>' . $personRegex['initials'] . ')$/', '/^(?P<givenName>' . $personRegex['givenName'] . ')\\s(?P<initials>' . $personRegex['initials'] . ')\\s' . $personRegexPrefixedSurname . '$/', '/^' . $personRegexPrefixedDoubleSurname . ',\\s(?P<givenName>(?:' . $personRegex['givenName'] . '\\s)+)(?P<initials>' . $personRegex['initials'] . ')$/', '/^(?P<givenName>(?:' . $personRegex['givenName'] . '\\s)+)(?P<initials>' . $personRegex['initials'] . ')\\s' . $personRegexPrefixedSurname . '$/', '/^' . $personRegexPrefixedDoubleSurname . ',(?P<givenName>(?:\\s' . $personRegex['givenName'] . ')+)$/', '/^(?P<givenName>(?:' . $personRegex['givenName'] . '\\s)+)' . $personRegexPrefixedSurname . '$/', '/^\\s*(?P<surname>' . $personRegex['surname'] . ')(?P<suffix>(?:\\s+' . $personRegex['suffix'] . ')?)\\s*,\\s*(?P<initials>(?:' . $personRegex['initials'] . ')?)\\s*\\((?P<givenName>(?:\\s*' . $personRegex['givenName'] . ')+)\\s*\\)\\s*(?P<prefix>(?:' . $personRegex['prefix'] . ')?)$/', '/^(?P<givenName>' . $personRegex['givenName'] . ')\\.(?P<surname>' . $personRegex['double-surname'] . ')$/', '/^(?P<surname>.*)$/');
        $results = array();
        foreach ($personExpressions as $expressionId => $personExpression) {
            if ($nameFound = PKPString::regexp_match_get($personExpression, $personString, $results)) {
                // Given names
                if (!empty($results['givenName'])) {
                    // Split given names
                    $givenNames = explode(' ', trim($results['givenName']));
                    foreach ($givenNames as $givenName) {
                        $personDescription->addStatement('given-names', $givenName);
                        unset($givenName);
                    }
                }
                // Initials (will also be saved as given names)
                if (!empty($results['initials'])) {
                    $results['initials'] = str_replace(array('.', '-', ' '), array('', '', ''), $results['initials']);
                    for ($initialNum = 0; $initialNum < PKPString::strlen($results['initials']); $initialNum++) {
                        $initial = $results['initials'][$initialNum];
                        $personDescription->addStatement('given-names', $initial);
                        unset($initial);
                    }
                }
                // Surname
                if (!empty($results['surname'])) {
                    // Correct all-upper surname
                    if (strtoupper($results['surname']) == $results['surname']) {
                        $results['surname'] = ucwords(strtolower($results['surname']));
                    }
                    $personDescription->addStatement('surname', $results['surname']);
                }
                // Prefix/Suffix
                foreach (array('prefix', 'suffix') as $propertyName) {
                    if (!empty($results[$propertyName])) {
                        $results[$propertyName] = trim($results[$propertyName]);
                        $personDescription->addStatement($propertyName, $results[$propertyName]);
                    }
                }
                break;
            }
        }
        return $personDescription;
    }