17 lines
526 B
Plaintext
17 lines
526 B
Plaintext
# h should be a DuckDB histogram and n should be map_values(h).list_sum()
|
|
create or replace function h2e(h, n) as (
|
|
- map_values(h).list_transform( x -> x * log2(x/n)).list_sum() / n
|
|
);
|
|
|
|
# str is normally a string but if not, will be cast to one
|
|
create or replace function shannon_entropy(str) as (
|
|
with cte as (
|
|
from unnest( string_split(str::VARCHAR,'')) _(s)
|
|
),
|
|
n as (select count(*) as n from cte),
|
|
h as (select histogram(s) as h from cte)
|
|
select h2e(h,n) from h, n
|
|
);
|
|
|
|
select shannon_entropy(1223334444);
|