|
Revision 727
(checked in by crchemist, 2 years ago)
|
Added tests.
|
- Property svn:eol-style set to
native
|
| Line | |
|---|
| 1 |
from xml.parsers.expat import ParserCreate |
|---|
| 2 |
import re |
|---|
| 3 |
|
|---|
| 4 |
def parse(data): |
|---|
| 5 |
def start_el(name, attrs): |
|---|
| 6 |
ParsedXML['start'][name] = attrs |
|---|
| 7 |
|
|---|
| 8 |
def char_handler(data): |
|---|
| 9 |
patt = re.compile('\S+', re.UNICODE) |
|---|
| 10 |
if patt.search(data): |
|---|
| 11 |
ParsedXML['data'].append(data) |
|---|
| 12 |
|
|---|
| 13 |
ParsedXML = {'start':{}, 'end':[], 'data':[]} |
|---|
| 14 |
parser = ParserCreate() |
|---|
| 15 |
parser.StartElementHandler = start_el |
|---|
| 16 |
parser.CharacterDataHandler = char_handler |
|---|
| 17 |
parser.Parse(data) |
|---|
| 18 |
return ParsedXML |
|---|
| 19 |
|
|---|
| 20 |
def hasURL(xml, url): |
|---|
| 21 |
parsed_sitemap = parse(xml) |
|---|
| 22 |
data = parsed_sitemap['data'] |
|---|
| 23 |
return url in data |
|---|