import { jStat } from 'jstat'
import { getValueAcrossEntities } from '../calculatedValues'
import { getGroupedValues } from './getAggregatedValues'
import getDataPoints from './getDataPoints'

import type { EntityObject } from '~/form-brain2'
import type { Experiment } from '../types'

export interface TTestValues {
  pValue: number
  tValue: number
  meanA: number
  meanB: number
  dof: number
}

interface Params {
  experiment: Experiment
  groupA: string
  groupB: string
  otherEntities: EntityObject[]
}

const
  twoSampleTTest = (groupA: number[], groupB: number[]): TTestValues => {
    const
      meanA = jStat.mean(groupA),
      meanB = jStat.mean(groupB),
      nA = groupA.length,
      nB = groupB.length,
      dof = nA + nB - 2,
      s2 = (
        (
          jStat.sum(jStat.pow(jStat.subtract(groupA, meanA), 2)) +
          jStat.sum(jStat.pow(jStat.subtract(groupB, meanB), 2))
        ) /
        (dof)
      ),
      tValue = (
        (meanA - meanB) /
        Math.sqrt(
          s2 / nA +
          s2 / nB
        )
      ),
      pValue = jStat.studentt.cdf(-Math.abs(tValue), dof) * 2 // two side p-value

    if (isNaN(dof) || dof < 1) {
      throw new Error(`Cannot run test because there are too few measurements in the chosen groups (${nA + nB} aggregated measurements, ${dof} degrees of freedom)`)
    }

    if (s2 <= 0) {
      throw new Error('Cannot run test because there is no variation in either group (all aggregated measurements for the group are the same)')
    }

    if (isNaN(pValue)) {
      throw new Error(`Cannot run test, resulting p value is not a number (means: ${meanA} and ${meanB}, degrees of freedom: ${dof}, t: ${tValue})`)
    }

    return { meanA, meanB, tValue, pValue, dof }
  },
  calculateTTestValues = ({ experiment, groupA, groupB, otherEntities }: Params): TTestValues => {
    const
      { dataPoints, subjectsById } = getDataPoints(otherEntities, experiment),
      outlierSubjectIds = (getValueAcrossEntities('outlierSubjectIds', otherEntities, true) as string[] | undefined) ?? [],
      { valueSetsByTreatment } = getGroupedValues(dataPoints, experiment, otherEntities, subjectsById, outlierSubjectIds)

    if (!valueSetsByTreatment[groupA]) {
      throw new Error(`Cannot run test for group "${groupA}" because it has no data`)
    }

    if (!valueSetsByTreatment[groupB]) {
      throw new Error(`Cannot run test for group "${groupB}" because it has no data`)
    }

    // return twoSampleTTest([1,2,2,3,3,4,4,5,5,6], [1,2,4,5,5,5,6,6,7,9]) // Should return t = -1.96 and p > 0.1
    return twoSampleTTest(valueSetsByTreatment[groupA].map(o => o.value), valueSetsByTreatment[groupB].map(o => o.value))
  }

export default calculateTTestValues
