Context Navigation

source: products/quintagroup.transmogrifier.simpleblog2quills/trunk/quintagroup/transmogrifier/simpleblog2quills/adapters.py @ 1193

Last change on this file since 1193 was 1193, checked in by koval, 15 years ago
fixed bug in rewriting of absolute links in blog entry's text
File size: 13.7 KB

Line
1	import re
2	from xml.dom import minidom
3	from types import ListType
4	from types import TupleType
5
6	from zope.interface import implements, classProvides
7	from zope.app.annotation.interfaces import IAnnotations
8
9	from Products.CMFPlone.Portal import PloneSite
10	from Products.CMFCore import utils
11
12	from collective.transmogrifier.interfaces import ISection, ISectionBlueprint
13	from collective.transmogrifier.utils import defaultMatcher
14
15	from quintagroup.transmogrifier.interfaces import IExportDataCorrector, IImportDataCorrector
16	from quintagroup.transmogrifier.adapters.exporting import ReferenceExporter
17	from quintagroup.transmogrifier.manifest import ManifestExporterSection
18	from quintagroup.transmogrifier.logger import VALIDATIONKEY
19
20	from quintagroup.transmogrifier.simpleblog2quills.interfaces import IExportItemManipulator, IBlog
21
22	# URL of the site, where blog is located (this is needed to fix links in entries)
23	SITE_URLS = []
24	IMAGE_FOLDER = 'images'
25	# this registries are needed to avoid loosing images with equal ids
26	IMAGE_IDS = []
27	IMAGE_PATHS = {}
28
29	class BlogManifest(object):
30	implements(IExportDataCorrector)
31
32	def __init__(self, context):
33	self.context = context
34
35	def __call__(self, data):
36	doc = minidom.parseString(data['data'])
37	root = doc.documentElement
38	for child in root.getElementsByTagName('record'):
39	if child.getAttribute('type') not in ('BlogEntry', 'BlogFolder'):
40	root.removeChild(child)
41	folder = doc.createElement('record')
42	folder.setAttribute('type', 'Large Plone Folder')
43	folder.appendChild(doc.createTextNode(IMAGE_FOLDER))
44	root.appendChild(folder)
45	data['data'] = doc.toxml('utf-8')
46	return data
47
48	class BlogFolderManifest(object):
49	implements(IExportDataCorrector)
50
51	def __init__(self, context):
52	self.context = context
53
54	def __call__(self, data):
55	doc = minidom.parseString(data['data'])
56	root = doc.documentElement
57	for child in root.getElementsByTagName('record'):
58	if child.getAttribute('type') not in ('BlogEntry', 'BlogFolder'):
59	root.removeChild(child)
60	data['data'] = doc.toxml('utf-8')
61	return data
62
63	class BlogEntryManifest(object):
64	implements(IExportItemManipulator)
65
66	def __init__(self, context):
67	self.context = context
68
69	def __call__(self, item, **kw):
70	# remove manifest data from item - content contained in BlogEntry isn't exported
71	if '_files' in item and 'manifest' in item['_files']:
72	del item['_files']['manifest']
73	return item
74
75	def recurseToInterface(item, ifaces):
76	"""Recurse up the aq_chain until an object providing `iface' is found,
77	and return that.
78	"""
79	if not isinstance(ifaces, (ListType, TupleType)):
80	ifaces = [ifaces]
81	parent = item.aq_parent
82	for iface in ifaces:
83	if iface.providedBy(item):
84	return item
85	for iface in ifaces:
86	if iface.providedBy(parent):
87	return parent
88	if isinstance(parent, PloneSite):
89	# Stop when we get to the portal root.
90	return None
91	return recurseToInterface(parent, ifaces)
92
93	def getUniqueId(image_id):
94	""" Generate id that is unique in IMAGE_IDS registry.
95	"""
96	if '.' in image_id:
97	name, ext = image_id.rsplit('.', 1)
98	ext = '.' + ext
99	else:
100	name, ext = image_id, ''
101	if image_id in IMAGE_IDS:
102	c = 1
103	new_id = name + str(c) + ext
104	while new_id in IMAGE_IDS:
105	c += 1
106	new_id = name + str(c) + ext
107	image_id = new_id
108
109	return image_id
110
111	class BlogEntryExporter(ReferenceExporter):
112	implements(IExportDataCorrector)
113
114	SRC = re.compile(r'src="([^"]+)"')
115
116	def __init__(self, context):
117	self.context = context
118	self.portal_url = utils.getToolByName(self.context, 'portal_url')
119	self.portal = self.portal_url.getPortalObject()
120
121	def __call__(self, data):
122	data = super(BlogEntryExporter, self).__call__(data)
123	doc = minidom.parseString(data['data'])
124	try:
125	elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'body'][0]
126	except IndexError:
127	return data
128
129	text = elem.firstChild.nodeValue
130	urls = self.SRC.findall(text)
131	blog = recurseToInterface(self.context, IBlog)
132	blog_path = blog.getPhysicalPath()
133	context_path = self.context.getPhysicalPath()
134	for url in urls:
135	url = str(url)
136	image_id = url.rsplit('/', 1)[-1]
137	# skip links with illegal url schema
138	if '://' in url and not url.startswith('http://'):
139	continue
140	# convert all all links to relative
141	if url.startswith('http://'):
142	for site in SITE_URLS:
143	if url.startswith(site):
144	# check whether image is stored in blog
145	relative_url = url[len(site):]
146	relative_url = relative_url.strip('/')
147	# if link is broken we'll get an AttributeError
148	try:
149	image = self.portal.unrestrictedTraverse(relative_url)
150	except AttributeError:
151	break
152	in_blog = recurseToInterface(image, IBlog) is not None and True or False
153	if in_blog:
154	image_id = self.fixImageId(image, image_id, blog_path)
155	level = len(context_path) - len(blog_path) - 1
156	new_url = '/'.join(['..' for i in range(level)])
157	new_url = '/'.join((new_url, IMAGE_FOLDER, image_id))
158	text = text.replace(url, new_url, 1)
159	else:
160	# find how many levels self.context is under portal root
161	level = len(context_path) - 3
162	new_url = '/'.join(['..' for i in range(level)])
163	new_url = new_url + '/' + relative_url
164	text = text.replace(url, new_url, 1)
165	break
166	else:
167	if url.startswith('/'):
168	# if link is broken we'll get an AttributeError
169	try:
170	image = self.portal.unrestrictedTraverse(url.strip('/'))
171	except AttributeError:
172	continue
173	else:
174	# if link is broken we'll get an AttributeError
175	try:
176	image = self.context.unrestrictedTraverse(url)
177	except AttributeError:
178	continue
179	in_blog = recurseToInterface(image, IBlog) is not None and True or False
180	if in_blog:
181	image_id = self.fixImageId(image, image_id, blog_path)
182	level = len(context_path) - len(blog_path) - 1
183	new_url = '/'.join(['..' for i in range(level)])
184	new_url = '/'.join([new_url, IMAGE_FOLDER, image_id])
185	text = text.replace(url, new_url, 1)
186	elif url.startswith('../'):
187	# remove '../' from the start of string
188	new_url = url[3:]
189	text = text.replace(url, new_url, 1)
190	elif url.startswith('/'):
191	# these links didn't work so rewrite them with '..'
192	# find how many levels self.context is under portal root
193	level = len(context_path) - 3
194	new_url = '/'.join(['..' for i in range(level)])
195	new_url = new_url + url
196	text = text.replace(url, new_url, 1)
197
198	elem.firstChild.nodeValue = text
199	data['data'] = doc.toxml('utf-8')
200	return data
201
202	def fixImageId(self, image, image_id, blog_path):
203	""" Check whether image is good or generate new if it's bad.
204	"""
205	image_path = '/'.join(image.getPhysicalPath())
206	if image_id in IMAGE_IDS and image_path not in IMAGE_PATHS:
207	image_id = getUniqueId(image_id)
208	if image_id not in IMAGE_IDS:
209	IMAGE_IDS.append(image_id)
210	IMAGE_PATHS[image_path] = '/'.join(blog_path[2:] + (IMAGE_FOLDER, image_id))
211
212	return image_id
213
214	class PathRewriter(object):
215	implements(IExportItemManipulator)
216
217	def __init__(self, context):
218	self.context = context
219
220	def __call__(self, item, **kw):
221	pathkey = kw.get('path')
222	if pathkey is None:
223	return item
224
225	path = item[pathkey]
226	blog = recurseToInterface(self.context, IBlog)
227	if blog is None:
228	return item
229
230	blog_path = blog.getPhysicalPath()
231	full_path = '/'.join(self.context.getPhysicalPath())
232	image_id = path.rsplit('/', 1)[-1]
233	modified = False
234
235	if full_path in IMAGE_PATHS:
236	new_path = IMAGE_PATHS[full_path]
237	else:
238	unique_id = getUniqueId(image_id)
239	modified = image_id != unique_id
240	new_path = '/'.join(blog_path[2:] + (IMAGE_FOLDER, unique_id))
241
242	IMAGE_IDS.append(image_id)
243	IMAGE_PATHS[full_path] = new_path
244
245	# change item's path
246	item[pathkey] = new_path
247	item['_oldpath'] = path
248
249	# now we need to fix object id in .marshall.xml
250	if modified:
251	if '_files' in item and 'marshall' in item['_files']:
252	doc = minidom.parseString(item['_files']['marshall']['data'])
253	elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'id'][0]
254	elem.firstChild.nodeValue = '\n\t\t%s\n\t' % unique_id
255	item['_files']['marshall']['data'] = doc.toxml('utf-8')
256
257	return item
258
259	class ImageFolderSection(object):
260	""" This section will generate manifest files for image folders in blog.
261	"""
262	classProvides(ISectionBlueprint)
263	implements(ISection)
264
265	def __init__(self, transmogrifier, name, options, previous):
266	self.previous = previous
267	self.transmogrifier = transmogrifier
268
269	self.flagkey = defaultMatcher(options, 'old-path-key', name, 'oldpath')
270	self.typekey = defaultMatcher(options, 'type-key', name, 'type')
271	self.pathkey = defaultMatcher(options, 'path-key', name, 'path')
272
273
274	site_urls = options.get('site-urls', '')
275	site_urls = filter(None, [i.strip() for i in site_urls.splitlines()])
276	for i in site_urls:
277	SITE_URLS.append(i)
278
279	self.anno = IAnnotations(transmogrifier)
280
281	def __iter__(self):
282	folders = {}
283
284	# safely get logging storage
285	if VALIDATIONKEY in self.anno:
286	log_storage = self.anno[VALIDATIONKEY]
287	else:
288	log_storage = None
289
290	for item in self.previous:
291	item_keys = item.keys()
292	pathkey = self.pathkey(*item_keys)[0]
293	typekey = self.typekey(*item_keys)[0]
294	oldpathkey = self.flagkey(*item_keys)[0]
295
296	# collect data about images moved to folders
297	if pathkey and typekey and oldpathkey:
298	path = item[pathkey]
299	old_path = item[oldpathkey]
300	type_ = item[typekey]
301	folder_path, image_id = path.rsplit('/', 1)
302	folders.setdefault(folder_path, []).append((image_id, type_))
303
304	# update logging data (path) for this item
305	if log_storage and log_storage[-1] == old_path:
306	log_storage.pop()
307	log_storage.append(path)
308
309	yield item
310
311	# generate manifests for those image folders
312	items = []
313	for folder, entries in folders.items():
314	items.append({'_entries': entries, pathkey: folder})
315	exporter = ManifestExporterSection(self.transmogrifier, 'manifest', {'blueprint': 'manifest'}, iter(items))
316	for item in exporter:
317	yield item
318
319	# clean registries
320	while IMAGE_IDS: IMAGE_IDS.pop()
321	while SITE_URLS: SITE_URLS.pop()
322	IMAGE_PATHS.clear()
323
324	class WorkflowImporter(object):
325	""" This adapter tries to convert all possible workflow histories to
326	simple_publication_workflow history.
327	"""
328	implements(IImportDataCorrector)
329
330	def __init__(self, context):
331	self.context = context
332
333	def __call__(self, data):
334	doc = minidom.parseString(data['data'])
335	wh = [i for i in doc.getElementsByTagName('cmf:workflow')]
336	if not wh:
337	# we don't have such workflow history
338	return data
339
340	wh = wh[0]
341	workflow_id = wh.getAttribute('id')
342	if workflow_id == 'simple_publication_workflow':
343	return data
344	wh.setAttribute('id', 'simple_publication_workflow')
345	if workflow_id == 'simpleblog_workflow':
346	self.fixSimpleBlogWorkflow(wh)
347	else:
348	self.fixWorkflow(wh)
349
350	data['data'] = doc.toxml('utf-8')
351	return data
352
353	def fixSimpleBlogWorkflow(self, wh):
354	for history in wh.getElementsByTagName('cmf:history'):
355	for var in history.getElementsByTagName('cmf:var'):
356	id_ = var.getAttribute('id')
357	value = var.getAttribute('value')
358	if id_ == 'review_state' and value == 'draft':
359	var.setAttribute('value', 'private')
360
361	def fixWorkflow(self, wh):
362	for history in wh.getElementsByTagName('cmf:history'):
363	for var in history.getElementsByTagName('cmf:var'):
364	id_ = var.getAttribute('id')
365	value = var.getAttribute('value')
366	if id_ == 'review_state' and value == 'visible':
367	var.setAttribute('value', 'published')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: