RosettaCodeData/Task/P-value-correction/Rust/p-value-correction.rs

233 lines
7.6 KiB
Rust

use std::iter;
#[rustfmt::skip]
const PVALUES:[f64;50] = [
4.533_744e-01, 7.296_024e-01, 9.936_026e-02, 9.079_658e-02, 1.801_962e-01,
8.752_257e-01, 2.922_222e-01, 9.115_421e-01, 4.355_806e-01, 5.324_867e-01,
4.926_798e-01, 5.802_978e-01, 3.485_442e-01, 7.883_130e-01, 2.729_308e-01,
8.502_518e-01, 4.268_138e-01, 6.442_008e-01, 3.030_266e-01, 5.001_555e-02,
3.194_810e-01, 7.892_933e-01, 9.991_834e-01, 1.745_691e-01, 9.037_516e-01,
1.198_578e-01, 3.966_083e-01, 1.403_837e-02, 7.328_671e-01, 6.793_476e-02,
4.040_730e-03, 3.033_349e-04, 1.125_147e-02, 2.375_072e-02, 5.818_542e-04,
3.075_482e-04, 8.251_272e-03, 1.356_534e-03, 1.360_696e-02, 3.764_588e-04,
1.801_145e-05, 2.504_456e-07, 3.310_253e-02, 9.427_839e-03, 8.791_153e-04,
2.177_831e-04, 9.693_054e-04, 6.610_250e-05, 2.900_813e-02, 5.735_490e-03
];
#[derive(Debug)]
enum CorrectionType {
BenjaminiHochberg,
BenjaminiYekutieli,
Bonferroni,
Hochberg,
Holm,
Hommel,
Sidak,
}
enum SortDirection {
Increasing,
Decreasing,
}
/// orders **input** vector by value and multiplies with **multiplier** vector
/// Finally returns the multiplied values in the original order of **input**
fn ordered_multiply(input: &[f64], multiplier: &[f64], direction: &SortDirection) -> Vec<f64> {
let order_by_value = match direction {
SortDirection::Increasing => {
|a: &(f64, usize), b: &(f64, usize)| b.0.partial_cmp(&a.0).unwrap()
}
SortDirection::Decreasing => {
|a: &(f64, usize), b: &(f64, usize)| a.0.partial_cmp(&b.0).unwrap()
}
};
let cmp_minmax = match direction {
SortDirection::Increasing => |a: f64, b: f64| a.gt(&b),
SortDirection::Decreasing => |a: f64, b: f64| a.lt(&b),
};
// add original order index
let mut input_indexed = input
.iter()
.enumerate()
.map(|(idx, &p_value)| (p_value, idx))
.collect::<Vec<_>>();
// order by value desc/asc
input_indexed.sort_unstable_by(order_by_value);
// do the multiplication in place, clamp it at 1.0,
// keep the original index in place
for i in 0..input_indexed.len() {
input_indexed[i] = (
f64::min(1.0, input_indexed[i].0 * multiplier[i]),
input_indexed[i].1,
);
}
// make vector strictly monotonous increasing/decreasing in place
for i in 1..input_indexed.len() {
if cmp_minmax(input_indexed[i].0, input_indexed[i - 1].0) {
input_indexed[i] = (input_indexed[i - 1].0, input_indexed[i].1);
}
}
// re-sort back to original order
input_indexed.sort_unstable_by(|a: &(f64, usize), b: &(f64, usize)| a.1.cmp(&b.1));
// remove ordering index
let (resorted, _): (Vec<_>, Vec<_>) = input_indexed.iter().cloned().unzip();
resorted
}
#[allow(clippy::cast_precision_loss)]
fn hommel(input: &[f64]) -> Vec<f64> {
// using algorith described:
// http://stat.wharton.upenn.edu/~steele/Courses/956/ResourceDetails/MultipleComparision/Writght92.pdf
// add original order index
let mut input_indexed = input
.iter()
.enumerate()
.map(|(idx, &p_value)| (p_value, idx))
.collect::<Vec<_>>();
// order by value asc
input_indexed
.sort_unstable_by(|a: &(f64, usize), b: &(f64, usize)| a.0.partial_cmp(&b.0).unwrap());
let (p_values, order): (Vec<_>, Vec<_>) = input_indexed.iter().cloned().unzip();
let n = input.len();
// initial minimal n*p/i values
// get the smalles of these values
let min_result = (0..n)
.map(|i| ((p_values[i] * n as f64) / (i + 1) as f64))
.fold(1. / 0. /* -inf */, f64::min);
// // initialize result vector with minimal values
let mut result = iter::repeat(min_result).take(n).collect::<Vec<_>>();
for m in (2..n).rev() {
let cmin: f64;
let m_as_float = m as f64;
let mut a = p_values.clone();
// println!("\nn: {}", m);
{
// split p-values into two group
let (_, second) = p_values.split_at(n - m + 1);
// calculate minumum of m*p/i for this second group
cmin = second
.iter()
.zip(2..=m)
.map(|(p, i)| (m_as_float * p) / i as f64)
.fold(1. / 0. /* inf */, f64::min);
}
// replace p values if p<cmin in the second group
((n - m + 1)..n).for_each(|i| a[i] = a[i].max(cmin));
// replace p values if min(cmin, m*p) > p
(0..=(n - m)).for_each(|i| a[i] = a[i].max(f64::min(cmin, m_as_float * p_values[i])));
// store in the result vector if any adjusted p is higher than the current one
(0..n).for_each(|i| result[i] = result[i].max(a[i]));
}
// re-sort into the original order
let mut result = result
.into_iter()
.zip(order.into_iter())
.map(|(p, idx)| (p, idx))
.collect::<Vec<_>>();
result.sort_unstable_by(|a: &(f64, usize), b: &(f64, usize)| a.1.cmp(&b.1));
let (result, _): (Vec<_>, Vec<_>) = result.iter().cloned().unzip();
result
}
#[allow(clippy::cast_precision_loss)]
fn p_value_correction(p_values: &[f64], ctype: &CorrectionType) -> Vec<f64> {
let p_vec = p_values.to_vec();
if p_values.is_empty() {
return p_vec;
}
let fsize = p_values.len() as f64;
match ctype {
CorrectionType::BenjaminiHochberg => {
let multiplier = (0..p_values.len())
.map(|index| fsize / (fsize - index as f64))
.collect::<Vec<_>>();
ordered_multiply(&p_vec, &multiplier, &SortDirection::Increasing)
}
CorrectionType::BenjaminiYekutieli => {
let q: f64 = (1..=p_values.len()).map(|index| 1. / index as f64).sum();
let multiplier = (0..p_values.len())
.map(|index| q * fsize / (fsize - index as f64))
.collect::<Vec<_>>();
ordered_multiply(&p_vec, &multiplier, &SortDirection::Increasing)
}
CorrectionType::Bonferroni => p_vec
.iter()
.map(|p| f64::min(p * fsize, 1.0))
.collect::<Vec<_>>(),
CorrectionType::Hochberg => {
let multiplier = (0..p_values.len())
.map(|index| 1. + index as f64)
.collect::<Vec<_>>();
ordered_multiply(&p_vec, &multiplier, &SortDirection::Increasing)
}
CorrectionType::Holm => {
let multiplier = (0..p_values.len())
.map(|index| fsize - index as f64)
.collect::<Vec<_>>();
ordered_multiply(&p_vec, &multiplier, &SortDirection::Decreasing)
}
CorrectionType::Sidak => p_vec
.iter()
.map(|x| 1. - (1. - x).powf(fsize))
.collect::<Vec<_>>(),
CorrectionType::Hommel => hommel(&p_vec),
}
}
// prints array into a nice table, max 5 floats/row
fn array_to_string(a: &[f64]) -> String {
a.chunks(5)
.enumerate()
.map(|(index, e)| {
format!(
"[{:>2}]: {}",
index * 5,
e.iter()
.map(|x| format!("{:>1.10}", x))
.collect::<Vec<_>>()
.join(", ")
)
})
.collect::<Vec<_>>()
.join("\n")
}
fn main() {
let ctypes = [
CorrectionType::BenjaminiHochberg,
CorrectionType::BenjaminiYekutieli,
CorrectionType::Bonferroni,
CorrectionType::Hochberg,
CorrectionType::Holm,
CorrectionType::Sidak,
CorrectionType::Hommel,
];
for ctype in &ctypes {
println!("\n{:?}:", ctype);
println!("{}", array_to_string(&p_value_correction(&PVALUES, ctype)));
}
}