use std::iter; #[rustfmt::skip] const PVALUES:[f64;50] = [ 4.533_744e-01, 7.296_024e-01, 9.936_026e-02, 9.079_658e-02, 1.801_962e-01, 8.752_257e-01, 2.922_222e-01, 9.115_421e-01, 4.355_806e-01, 5.324_867e-01, 4.926_798e-01, 5.802_978e-01, 3.485_442e-01, 7.883_130e-01, 2.729_308e-01, 8.502_518e-01, 4.268_138e-01, 6.442_008e-01, 3.030_266e-01, 5.001_555e-02, 3.194_810e-01, 7.892_933e-01, 9.991_834e-01, 1.745_691e-01, 9.037_516e-01, 1.198_578e-01, 3.966_083e-01, 1.403_837e-02, 7.328_671e-01, 6.793_476e-02, 4.040_730e-03, 3.033_349e-04, 1.125_147e-02, 2.375_072e-02, 5.818_542e-04, 3.075_482e-04, 8.251_272e-03, 1.356_534e-03, 1.360_696e-02, 3.764_588e-04, 1.801_145e-05, 2.504_456e-07, 3.310_253e-02, 9.427_839e-03, 8.791_153e-04, 2.177_831e-04, 9.693_054e-04, 6.610_250e-05, 2.900_813e-02, 5.735_490e-03 ]; #[derive(Debug)] enum CorrectionType { BenjaminiHochberg, BenjaminiYekutieli, Bonferroni, Hochberg, Holm, Hommel, Sidak, } enum SortDirection { Increasing, Decreasing, } /// orders **input** vector by value and multiplies with **multiplier** vector /// Finally returns the multiplied values in the original order of **input** fn ordered_multiply(input: &[f64], multiplier: &[f64], direction: &SortDirection) -> Vec { let order_by_value = match direction { SortDirection::Increasing => { |a: &(f64, usize), b: &(f64, usize)| b.0.partial_cmp(&a.0).unwrap() } SortDirection::Decreasing => { |a: &(f64, usize), b: &(f64, usize)| a.0.partial_cmp(&b.0).unwrap() } }; let cmp_minmax = match direction { SortDirection::Increasing => |a: f64, b: f64| a.gt(&b), SortDirection::Decreasing => |a: f64, b: f64| a.lt(&b), }; // add original order index let mut input_indexed = input .iter() .enumerate() .map(|(idx, &p_value)| (p_value, idx)) .collect::>(); // order by value desc/asc input_indexed.sort_unstable_by(order_by_value); // do the multiplication in place, clamp it at 1.0, // keep the original index in place for i in 0..input_indexed.len() { input_indexed[i] = ( f64::min(1.0, input_indexed[i].0 * multiplier[i]), input_indexed[i].1, ); } // make vector strictly monotonous increasing/decreasing in place for i in 1..input_indexed.len() { if cmp_minmax(input_indexed[i].0, input_indexed[i - 1].0) { input_indexed[i] = (input_indexed[i - 1].0, input_indexed[i].1); } } // re-sort back to original order input_indexed.sort_unstable_by(|a: &(f64, usize), b: &(f64, usize)| a.1.cmp(&b.1)); // remove ordering index let (resorted, _): (Vec<_>, Vec<_>) = input_indexed.iter().cloned().unzip(); resorted } #[allow(clippy::cast_precision_loss)] fn hommel(input: &[f64]) -> Vec { // using algorith described: // http://stat.wharton.upenn.edu/~steele/Courses/956/ResourceDetails/MultipleComparision/Writght92.pdf // add original order index let mut input_indexed = input .iter() .enumerate() .map(|(idx, &p_value)| (p_value, idx)) .collect::>(); // order by value asc input_indexed .sort_unstable_by(|a: &(f64, usize), b: &(f64, usize)| a.0.partial_cmp(&b.0).unwrap()); let (p_values, order): (Vec<_>, Vec<_>) = input_indexed.iter().cloned().unzip(); let n = input.len(); // initial minimal n*p/i values // get the smalles of these values let min_result = (0..n) .map(|i| ((p_values[i] * n as f64) / (i + 1) as f64)) .fold(1. / 0. /* -inf */, f64::min); // // initialize result vector with minimal values let mut result = iter::repeat(min_result).take(n).collect::>(); for m in (2..n).rev() { let cmin: f64; let m_as_float = m as f64; let mut a = p_values.clone(); // println!("\nn: {}", m); { // split p-values into two group let (_, second) = p_values.split_at(n - m + 1); // calculate minumum of m*p/i for this second group cmin = second .iter() .zip(2..=m) .map(|(p, i)| (m_as_float * p) / i as f64) .fold(1. / 0. /* inf */, f64::min); } // replace p values if p p (0..=(n - m)).for_each(|i| a[i] = a[i].max(f64::min(cmin, m_as_float * p_values[i]))); // store in the result vector if any adjusted p is higher than the current one (0..n).for_each(|i| result[i] = result[i].max(a[i])); } // re-sort into the original order let mut result = result .into_iter() .zip(order.into_iter()) .map(|(p, idx)| (p, idx)) .collect::>(); result.sort_unstable_by(|a: &(f64, usize), b: &(f64, usize)| a.1.cmp(&b.1)); let (result, _): (Vec<_>, Vec<_>) = result.iter().cloned().unzip(); result } #[allow(clippy::cast_precision_loss)] fn p_value_correction(p_values: &[f64], ctype: &CorrectionType) -> Vec { let p_vec = p_values.to_vec(); if p_values.is_empty() { return p_vec; } let fsize = p_values.len() as f64; match ctype { CorrectionType::BenjaminiHochberg => { let multiplier = (0..p_values.len()) .map(|index| fsize / (fsize - index as f64)) .collect::>(); ordered_multiply(&p_vec, &multiplier, &SortDirection::Increasing) } CorrectionType::BenjaminiYekutieli => { let q: f64 = (1..=p_values.len()).map(|index| 1. / index as f64).sum(); let multiplier = (0..p_values.len()) .map(|index| q * fsize / (fsize - index as f64)) .collect::>(); ordered_multiply(&p_vec, &multiplier, &SortDirection::Increasing) } CorrectionType::Bonferroni => p_vec .iter() .map(|p| f64::min(p * fsize, 1.0)) .collect::>(), CorrectionType::Hochberg => { let multiplier = (0..p_values.len()) .map(|index| 1. + index as f64) .collect::>(); ordered_multiply(&p_vec, &multiplier, &SortDirection::Increasing) } CorrectionType::Holm => { let multiplier = (0..p_values.len()) .map(|index| fsize - index as f64) .collect::>(); ordered_multiply(&p_vec, &multiplier, &SortDirection::Decreasing) } CorrectionType::Sidak => p_vec .iter() .map(|x| 1. - (1. - x).powf(fsize)) .collect::>(), CorrectionType::Hommel => hommel(&p_vec), } } // prints array into a nice table, max 5 floats/row fn array_to_string(a: &[f64]) -> String { a.chunks(5) .enumerate() .map(|(index, e)| { format!( "[{:>2}]: {}", index * 5, e.iter() .map(|x| format!("{:>1.10}", x)) .collect::>() .join(", ") ) }) .collect::>() .join("\n") } fn main() { let ctypes = [ CorrectionType::BenjaminiHochberg, CorrectionType::BenjaminiYekutieli, CorrectionType::Bonferroni, CorrectionType::Hochberg, CorrectionType::Holm, CorrectionType::Sidak, CorrectionType::Hommel, ]; for ctype in &ctypes { println!("\n{:?}:", ctype); println!("{}", array_to_string(&p_value_correction(&PVALUES, ctype))); } }