public static function tag(array $words)
{
static $dict;
if (empty($dict)) {
$dict = (require __DIR__ . '/lexicon.php');
}
$tmp = [];
$return = [];
$nouns = ['NN', 'NNS'];
$i = 0;
foreach ($words as $id => $token) {
$tmp[$i] = ['token' => $token, 'tag' => 'NN'];
$return[$id] =& $tmp[$i];
// remove trailing full stops
if (substr($token, -1) == '.') {
$token = preg_replace('/\\.+$/', '', $token);
}
// get from dict if set
if (!empty($dict[$token])) {
$tmp[$i]['tag'] = $dict[$token][0];
}
// Converts verbs after 'the' to nouns
if ($i > 0) {
if ($tmp[$i - 1]['tag'] == 'DT' && in_array($tmp[$i]['tag'], array('VBD', 'VBP', 'VB'))) {
$tmp[$i]['tag'] = 'NN';
}
}
// Convert noun to number if . appears
if ($tmp[$i]['tag'][0] == 'N' && strpos($token, '.') !== false) {
$tmp[$i]['tag'] = 'CD';
}
// Convert noun to past particile if ends with 'ed'
if ($tmp[$i]['tag'][0] == 'N' && substr($token, -2) == 'ed') {
$tmp[$i]['tag'] = 'VBN';
}
// Anything that ends 'ly' is an adverb
if (substr($token, -2) == 'ly') {
$tmp[$i]['tag'] = 'RB';
}
// Common noun to adjective if it ends with al
if (in_array($tmp[$i]['tag'], $nouns) && substr($token, -2) == 'al') {
$tmp[$i]['tag'] = 'JJ';
}
// Noun to verb if the word before is 'would'
if ($i > 0) {
if ($tmp[$i]['tag'] == 'NN' && $tmp[$i - 1]['token'] == 'would') {
$tmp[$i]['tag'] = 'VB';
}
}
// Convert noun to plural if it ends with an s
if ($tmp[$i]['tag'] == 'NN' && substr($token, -1) == 's') {
$tmp[$i]['tag'] = 'NNS';
}
// Convert common noun to gerund
if (in_array($tmp[$i]['tag'], $nouns) && substr($token, -3) == 'ing') {
$tmp[$i]['tag'] = 'VBG';
}
// If we get noun noun, and the second can be a verb, convert to verb
if ($i > 0) {
if (in_array($tmp[$i]['tag'], $nouns) && in_array($tmp[$i - 1]['tag'], $nouns) && isset($dict[$token])) {
if (in_array('VBN', $dict[$token])) {
$tmp[$i]['tag'] = 'VBN';
} else {
if (in_array('VBZ', $dict[$token])) {
$tmp[$i]['tag'] = 'VBZ';
}
}
}
}
$i++;
}
return $return;
}