26 lines
788 B
Clojure
26 lines
788 B
Clojure
(ns inverted-index.core
|
|
(:require [clojure.set :as sets]
|
|
[clojure.java.io :as io]))
|
|
|
|
(def pattern #"\w+") ; Java regex for a raw term: here a substring of alphanums
|
|
(defn normalize [match] (.toLowerCase match)) ; normalization of a raw term
|
|
|
|
(defn term-seq [text] (map normalize (re-seq pattern text)))
|
|
|
|
(defn set-assoc
|
|
"Produces map with v added to the set associated with key k in map m"
|
|
[m k v] (assoc m k (conj (get m k #{}) v)))
|
|
|
|
(defn index-file [index file]
|
|
(with-open [reader (io/reader file)]
|
|
(reduce
|
|
(fn [idx term] (set-assoc idx term file))
|
|
index
|
|
(mapcat term-seq (line-seq reader)))))
|
|
|
|
(defn make-index [files]
|
|
(reduce index-file {} files))
|
|
|
|
(defn search [index query]
|
|
(apply sets/intersection (map index (term-seq query))))
|