RosettaCodeData/Task/Entropy/DuckDB/entropy.duckdb

17 lines
526 B
Plaintext

# h should be a DuckDB histogram and n should be map_values(h).list_sum()
create or replace function h2e(h, n) as (
- map_values(h).list_transform( x -> x * log2(x/n)).list_sum() / n
);
# str is normally a string but if not, will be cast to one
create or replace function shannon_entropy(str) as (
with cte as (
from unnest( string_split(str::VARCHAR,'')) _(s)
),
n as (select count(*) as n from cte),
h as (select histogram(s) as h from cte)
select h2e(h,n) from h, n
);
select shannon_entropy(1223334444);