blob: 0812f9546fc85eb18960539a9912df22029407b1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
#!/usr/bin/python
import httplib, urllib, re, sys
from BeautifulSoup import BeautifulSoup
terms = sys.argv[1:]
limit = 100
params = urllib.urlencode( { 'q': "+".join( terms ), 'num': limit } )
headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; WindowsNT)'}
url = '/scholar'+"?"+params
conn = httplib.HTTPConnection( 'scholar.google.com' )
conn.request( "GET", url, {}, headers )
resp = conn.getresponse()
cites = []
if resp.status == 200:
html = resp.read()
html = html.decode( 'ascii', 'ignore' )
soup = BeautifulSoup( html )
for record in soup( 'h3', { 'class': 'r' } ):
print "we have a match!"
match = re.search("Cited by ([^<]*)", str(record))
if match != None:
cite = int( match.group( 1 ) )
cites.append( cite )
else:
print 'Error: '
print resp.status, resp.reason
cites.sort()
cites.reverse()
h = 0
for cite in cites:
if cite > h:
h += 1
print h
|