blob: 6408f1e4c93aa5363e851658686ce68a835f00b2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
#!/usr/bin/python
from lxml import etree as et
from urllib import quote_plus,urlopen
def gsearch(q='',num=10,datelimit=''):
returninfo=[]
searchurl='http://google.com/search?hl=en&as_q=%s&num=%s&as_qdr=%s'%(quote_plus(q),str(num),datelimit)
results=urlopen(searchurl).read()
tree=et.fromstring(results,et.HTMLParser())
links=tree.xpath('/html/body[@id="gsr"]/div[@id="res"]/div[1]/h3/a')
return tree
for a in links:
returninfo.append({'href':a.values()[0],'text':a.text})
return returninfo
def gs(xpath):
returninfo = []
content = (open("tests/science-direct-search-results.html","r")).read()
tree = et.fromstring(content, et.HTMLParser())
links = tree.xpath(xpath)
return links
|