Last change
on this file was
3152,
checked in by zidane, 13 years ago
|
fixes pep8
|
-
Property svn:eol-style set to
native
|
File size:
619 bytes
|
Rev | Line | |
---|
[1593] | 1 | from xml.parsers.expat import ParserCreate |
---|
| 2 | import re |
---|
| 3 | |
---|
[3152] | 4 | |
---|
[1593] | 5 | def parse(data): |
---|
| 6 | def start_el(name, attrs): |
---|
| 7 | ParsedXML['start'][name] = attrs |
---|
| 8 | |
---|
| 9 | def char_handler(data): |
---|
| 10 | patt = re.compile('\S+', re.UNICODE) |
---|
| 11 | if patt.search(data): |
---|
| 12 | ParsedXML['data'].append(data) |
---|
| 13 | |
---|
[3152] | 14 | ParsedXML = {'start': {}, 'end': [], 'data': []} |
---|
[1593] | 15 | parser = ParserCreate() |
---|
| 16 | parser.StartElementHandler = start_el |
---|
| 17 | parser.CharacterDataHandler = char_handler |
---|
| 18 | parser.Parse(data) |
---|
| 19 | return ParsedXML |
---|
| 20 | |
---|
[3152] | 21 | |
---|
[1593] | 22 | def hasURL(xml, url): |
---|
| 23 | parsed_sitemap = parse(xml) |
---|
| 24 | data = parsed_sitemap['data'] |
---|
[3152] | 25 | return url in data |
---|
Note: See
TracBrowser
for help on using the repository browser.