46 lines
1.1 KiB
Python
46 lines
1.1 KiB
Python
import urllib
|
|
import re
|
|
|
|
def fix(x):
|
|
p = re.compile(r'<[^<]*?>')
|
|
return p.sub('', x).replace('&', '&')
|
|
|
|
class YahooSearch:
|
|
def __init__(self, query, page=1):
|
|
self.query = query
|
|
self.page = page
|
|
self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1))
|
|
self.content = urllib.urlopen(self.url).read()
|
|
|
|
def getresults(self):
|
|
self.results = []
|
|
|
|
for i in re.findall('<a class="yschttl spt" href=".+?">(.+?)</a></h3></div>(.+?)</div>.*?<span class=url>(.+?)</span>', self.content):
|
|
|
|
title = fix(i[0])
|
|
content = fix(i[1])
|
|
url = fix(i[2])
|
|
|
|
self.results.append(YahooResult(title, content, url))
|
|
|
|
return self.results
|
|
|
|
def getnextpage(self):
|
|
return YahooSearch(self.query, self.page+1)
|
|
|
|
search_results = property(fget=getresults)
|
|
nextpage = property(fget=getnextpage)
|
|
|
|
class YahooResult:
|
|
def __init__(self,title,content,url):
|
|
self.title = title
|
|
self.content = content
|
|
self.url = url
|
|
|
|
# Usage:
|
|
|
|
x = YahooSearch("test")
|
|
|
|
for result in x.search_results:
|
|
print result.title
|