64 lines
1.4 KiB
Ruby
64 lines
1.4 KiB
Ruby
require 'open-uri'
|
|
require 'hpricot'
|
|
|
|
SearchResult = Struct.new(:url, :title, :content)
|
|
|
|
class SearchYahoo
|
|
@@urlinfo = [nil, 'ca.search.yahoo.com', 80, '/search', nil, nil]
|
|
|
|
def initialize(term)
|
|
@term = term
|
|
@page = 1
|
|
@results = nil
|
|
@url = URI::HTTP.build(@@urlinfo)
|
|
end
|
|
|
|
def next_result
|
|
if not @results
|
|
@results = []
|
|
fetch_results
|
|
elsif @results.empty?
|
|
next_page
|
|
end
|
|
@results.shift
|
|
end
|
|
|
|
def fetch_results
|
|
@url.query = URI.escape("p=%s&b=%d" % [@term, @page])
|
|
doc = open(@url) { |f| Hpricot(f) }
|
|
parse_html(doc)
|
|
end
|
|
|
|
def next_page
|
|
@page += 10
|
|
fetch_results
|
|
end
|
|
|
|
def parse_html(doc)
|
|
doc.search("div#main").search("div").each do |div|
|
|
next unless div.has_attribute?("class") and div.get_attribute("class").index("res") == 0
|
|
result = SearchResult.new
|
|
div.search("a").each do |link|
|
|
next unless link.has_attribute?("class") and link.get_attribute("class") == "yschttl spt"
|
|
result.url = link.get_attribute("href")
|
|
result.title = link.inner_text
|
|
end
|
|
div.search("div").each do |abstract|
|
|
next unless abstract.has_attribute?("class") and abstract.get_attribute("class").index("abstr")
|
|
result.content = abstract.inner_text
|
|
end
|
|
@results << result
|
|
end
|
|
end
|
|
end
|
|
|
|
s = SearchYahoo.new("test")
|
|
15.times do |i|
|
|
result = s.next_result
|
|
puts i+1
|
|
puts result.title
|
|
puts result.url
|
|
puts result.content
|
|
puts
|
|
end
|