1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
import os, sys, sgmllib, cookielib, urllib, htmlentitydefs
if len(sys.argv) > 1:
ref = sys.argv[1]
else:
ref = "../html/gcode.html"
if len(sys.argv) > 2:
targets = sys.argv[2:]
else:
targets = None
def get(attr, attrs, default=""):
attr = attr.lower()
for k, v in attrs:
if k.lower() == attr: return v
return default
class MetaHandler:
def do_meta(self, attrs):
equiv = get("http-equiv", attrs)
content = get("content", attrs)
if equiv != "content-type": return
attrs = cookielib.split_header_words([content])[0]
encoding = get("charset", attrs)
if encoding == "ASCII": encoding = "ISO-8859-1"
if encoding: self.encoding = encoding
class get_refs(sgmllib.SGMLParser, MetaHandler):
entitydefs = htmlentitydefs.entitydefs
def __init__(self, verbose=0):
sgmllib.SGMLParser.__init__(self, verbose)
self.refs = set()
self.encoding = None
def do_a(self, attrs):
href = get('href', attrs)
if self.encoding:
href = href.decode(self.encoding)
href = urllib.unquote(href)
self.refs.add(href)
class get_anchors(sgmllib.SGMLParser, MetaHandler):
entitydefs = htmlentitydefs.entitydefs
def __init__(self, verbose=0):
sgmllib.SGMLParser.__init__(self, verbose)
self.anchors = set()
self.encoding = None
def unknown_starttag(self, tag, attrs):
id = get('id', attrs)
if id:
self.do_a([('name', id)])
def unknown_endtag(self, tag): pass
def do_a(self, attrs):
name = get('name', attrs, get('id', attrs))
if self.encoding:
name = name.decode(self.encoding)
name = urllib.unquote(name)
if name:
self.anchors.add(name)
_anchors = {}
def get_anchors_cached(filename):
if filename not in _anchors:
a = get_anchors()
a.feed(open(filename).read())
_anchors[filename] = a.anchors
return _anchors[filename]
def resolve_file(src, target):
if "#" in target:
a, b = target.split("#", 1)
else:
a, b = target, None
a = a or src
return os.path.join(os.path.dirname(ref), a), b
def resolve(target, anchor):
if not anchor: return True
anchors = get_anchors_cached(target)
return anchor in anchors
refs = get_refs()
refs.feed(open(ref).read())
refs = refs.refs
missing_anchor = set()
missing_file = set()
unlisted_targets = set()
good = set()
for r in refs:
target, anchor = resolve_file(ref, r)
if targets and not target in targets:
unlisted_targets.add(target)
elif not os.path.exists(target):
missing_file.add(r)
elif not resolve(target, anchor):
missing_anchor.add(r)
else:
good.add(r)
if missing_file:
print "Files linked to in %s but could not be found:" % (
os.path.basename(ref),)
for i in sorted(missing_file):
print "\t%r" % i
if missing_anchor:
print "Anchors used in %s but not defined in linked file:" % (
os.path.basename(ref),)
for i in sorted(missing_anchor):
print "\t%r" % i
if unlisted_targets:
print "Links to files not listed as targets:"
for i in sorted(unlisted_targets):
print "\t%r" % i
print "If all link targets are not listed in the Submakefile, then the results of this program is unreliable."
print "Good links: %d/%d" % (len(good), len(refs))
if missing_anchor or missing_file or unlisted_targets:
raise SystemExit, 1
|