MathPHP\Statistics\ANOVA::twoWay PHP Метод

twoWay() публичный статический Метод

https://en.wikipedia.org/wiki/Two-way_analysis_of_variance Produces the following analysis of the data: ANOVA hypothesis test summary data | SS | df | MS | F | P | Factor A | | | | | | Factor B | | | | | | Interaction | | | | | | Error | | | | Total | | | where: Interaction = Factor A X Factor B working together Error is within groups SS = Sum of squares df = Degrees of freedom MS = Mean squares F = F statistic P = P value Data summary tables for: Factor A Factor B Factor AB (Interaction) Total | N | Sum | Mean | SS | Variance | SD | SEM | 0 | | | | | | | | 1 | | | | | | | | ... | | | | | | | | Total | | | | | | | | where: Each row is the summary for a sample, numbered from 0 to m - 1 m = Number of samples N = Sample size SS = Sum of squares SD = Standard deviation SEM = Standard error of the mean Calculations Sum of Squares SST (sum of squares total) ∑⟮xᵢ − μ⟯² where: xᵢ = each element of all samples μ = mean total of all elements of all samples SSA, SSB (sum of squares for each factor A and B) ∑n(x - μ)² where: n = sample size x = sample mean μ = mean total of all elements of all samples SSW (sum of squares within - error) ∑∑⟮x − μ⟯² Sum of sum of squared deviations of each sample where: x = mean of each AB μ = mean of the sample SSAB (sum of squares AB - interaction) SSAB = SST - SSA - SSB - SSW; Degrees of Freedom dfT (degrees of freedom for the total) n - 1 dfA (degrees of freedom factor A) r - 1 dfB (degrees of freedom factor B) c - 1 dfAB (degrees of freedom factor AB - interaction) (r - 1)(c - 1) dfW (degrees of freedom within - error) n - rc where: n = number of samples r = number of rows (number of factor As) c = number of columns (number of factor Bs) Mean Squares MSA (Mean squares factor A) SSA / dfA MSB (Mean squares factor B) SSB / dfB MSAB (Mean squares factor AB - interaction) SSAB / dfAB MSW (Mean squares within - error) SSW / dfW F Test Statistics FA = MSA / MSW FB = MSB / MSW FAB = MSAB / MSW P values PA = F distribution CDF above FA with degrees of freedom dfA and dfW PB = F distribution CDF above FB with degrees of freedom dfA and dfW PAB = F distribution CDF above FAB with degrees of freedom dfAB and dfW Example input data for ...$data parameter: | Factor B₁ | Factor B₂ | ⋯ Factor A₁ | 4, 6, 8 | 6, 6, 9 | ⋯ Factor A₂ | 4, 8, 9 | 7, 10, 13 | ⋯ ⋮ ⋮ ⋮ ⋮
public static twoWay ( variadic $data ) : array
$data variadic Samples to analyze [ // Factor A₁ [ [4, 6, 8] // Factor B₁ [6, 6, 9] // Factor B₂ ⋮ ], // Factor A₂ [ [4, 8, 9] // Factor B₁ [7, 10, 13] // Factor B₂ ⋮ ], ... ]
Результат array [ ANOVA => [ factorA => [SS, df, MS, F, P], factorB => [SS, df, MS, F, P], factorAB => [SS, df, MS, F, P], error => [SS, df, MS], total => [SS, df], ], total_summary => [n, sum, mean, SS, variance, sd, sem], summary_factorA => [ 0 => [n, sum, mean, SS, variance, sd, sem], 1 => [n, sum, mean, SS, variance, sd, sem], ... ], summary_factorB => [ 0 => [n, sum, mean, SS, variance, sd, sem], 1 => [n, sum, mean, SS, variance, sd, sem], ... ], summary_factorAB => [ 0 => [n, sum, mean, SS, variance, sd, sem], 1 => [n, sum, mean, SS, variance, sd, sem], ... ] ]
    public static function twoWay(array ...$data)
    {
        // Must have at least two rows (two types of factor A)
        $r = count($data);
        if ($r < 2) {
            throw new Exception\BadDataException('Must have at least two rows (two types of factor A)');
        }
        // All samples must have the same number the second factor B
        $c = count($data[0]);
        for ($i = 1; $i < $r; $i++) {
            if (count($data[$i]) !== $c) {
                throw new Exception\BadDataException('All samples must have the same number of the second factor B');
            }
        }
        // Each AB factor interaction must have the same number of values
        $v = count($data[0][0]);
        for ($i = 0; $i < $r; $i++) {
            for ($j = 0; $j < $c; $j++) {
                if (count($data[$i][$j]) !== $v) {
                    throw new Exception\BadDataException('Each AB factor interaction must have the same number of values');
                }
            }
        }
        // Aggregates for all elements, rows (factor A), and columns (factor B)
        $all_elements = [];
        $A_elements = [];
        $B_elements = [];
        // Summaries for factor A, factor B, AB, and total
        $summary_A = [];
        $summary_B = [];
        $summary_AB = [];
        $summary_total = [];
        // Summary data for each AB
        // And aggregate all elements and elements for factor A
        foreach ($data as $A => $Bs) {
            $A_elements[$A] = [];
            foreach ($Bs as $B => $values) {
                // Aggregates
                $all_elements = array_merge($all_elements, $values);
                $A_elements[$A] = array_merge($A_elements[$A], $values);
                // AB summary
                $summary_AB[$A][$B] = [];
                $summary_AB[$A][$B]['n'] = $c;
                $summary_AB[$A][$B]['sum'] = array_sum($values);
                $summary_AB[$A][$B]['mean'] = Average::mean($values);
                $summary_AB[$A][$B]['SS'] = RandomVariable::sumOfSquares($values);
                $summary_AB[$A][$B]['variance'] = Descriptive::sampleVariance($values);
                $summary_AB[$A][$B]['sd'] = Descriptive::sd($values);
                $summary_AB[$A][$B]['sem'] = RandomVariable::standardErrorOfTheMean($values);
            }
        }
        // Aggregate elements for factor B
        for ($B = 0; $B < $c; $B++) {
            $B_elements[$B] = [];
            foreach ($data as $factor1s) {
                $B_elements[$B] = array_merge($B_elements[$B], $factor1s[$B]);
            }
        }
        // Factor A summary
        foreach ($A_elements as $A => $elements) {
            $summary_A[$A] = [];
            $summary_A[$A]['n'] = count($elements);
            $summary_A[$A]['sum'] = array_sum($elements);
            $summary_A[$A]['mean'] = Average::mean($elements);
            $summary_A[$A]['SS'] = RandomVariable::sumOfSquares($elements);
            $summary_A[$A]['variance'] = Descriptive::sampleVariance($elements);
            $summary_A[$A]['sd'] = Descriptive::sd($elements);
            $summary_A[$A]['sem'] = RandomVariable::standardErrorOfTheMean($elements);
        }
        // Factor B summary
        foreach ($B_elements as $B => $elements) {
            $summary_B[$B] = [];
            $summary_B[$B]['n'] = count($elements);
            $summary_B[$B]['sum'] = array_sum($elements);
            $summary_B[$B]['mean'] = Average::mean($elements);
            $summary_B[$B]['SS'] = RandomVariable::sumOfSquares($elements);
            $summary_B[$B]['variance'] = Descriptive::sampleVariance($elements);
            $summary_B[$B]['sd'] = Descriptive::sd($elements);
            $summary_B[$B]['sem'] = RandomVariable::standardErrorOfTheMean($elements);
        }
        // Totals summary
        $μ = Average::mean($all_elements);
        $summary_total = ['n' => count($all_elements), 'sum' => array_sum($all_elements), 'mean' => $μ, 'SS' => RandomVariable::sumOfSquares($all_elements), 'variance' => Descriptive::sampleVariance($all_elements), 'sd' => Descriptive::sd($all_elements), 'sem' => RandomVariable::standardErrorOfTheMean($all_elements)];
        // Sum of squares factor A
        $SSA = array_sum(array_map(function ($f1) use($μ) {
            return $f1['n'] * ($f1['mean'] - $μ) ** 2;
        }, $summary_A));
        // Sum of squares factor B
        $SSB = array_sum(array_map(function ($B) use($μ) {
            return $B['n'] * ($B['mean'] - $μ) ** 2;
        }, $summary_B));
        // Sum of squares within (error)
        $SSW = 0;
        foreach ($data as $A => $Bs) {
            foreach ($Bs as $B => $values) {
                foreach ($values as $value) {
                    $SSW += ($value - $summary_AB[$A][$B]['mean']) ** 2;
                }
            }
        }
        // Sum of squares total
        $SST = 0;
        foreach ($data as $A => $Bs) {
            foreach ($Bs as $B => $values) {
                foreach ($values as $value) {
                    $SST += ($value - $μ) ** 2;
                }
            }
        }
        // Sum of squares AB interaction
        $SSAB = $SST - $SSA - $SSB - $SSW;
        // Degrees of freedom
        $dfA = $r - 1;
        $dfB = $c - 1;
        $dfAB = ($r - 1) * ($c - 1);
        $dfW = $summary_total['n'] - $r * $c;
        $dfT = $summary_total['n'] - 1;
        // Mean squares
        $MSA = $SSA / $dfA;
        $MSB = $SSB / $dfB;
        $MSAB = $SSAB / $dfAB;
        $MSW = $SSW / $dfW;
        // F test statistics
        $FA = $MSA / $MSW;
        $FB = $MSB / $MSW;
        $FAB = $MSAB / $MSW;
        // P values
        $PA = F::above($FA, $dfA, $dfW);
        $PB = F::above($FB, $dfB, $dfW);
        $PAB = F::above($FAB, $dfAB, $dfW);
        // Return ANOVA report
        return ['ANOVA' => ['factorA' => ['SS' => $SSA, 'df' => $dfA, 'MS' => $MSA, 'F' => $FA, 'P' => $PA], 'factorB' => ['SS' => $SSB, 'df' => $dfB, 'MS' => $MSB, 'F' => $FB, 'P' => $PB], 'interaction' => ['SS' => $SSAB, 'df' => $dfAB, 'MS' => $MSAB, 'F' => $FAB, 'P' => $PAB], 'error' => ['SS' => $SSW, 'df' => $dfW, 'MS' => $MSW], 'total' => ['SS' => $SST, 'df' => $dfT]], 'total_summary' => $summary_total, 'summary_factorA' => $summary_A, 'summary_factorB' => $summary_B, 'summary_interaction' => $summary_AB];
    }