Context Navigation

adapters.py @ 1241

Last change on this file since 1241 was 1241, checked in by koval, 15 years ago
manifest datacorrector for Blog content type now chekcs for existance of folder with images
File size: 14.0 KB

Line
1	import re
2	from xml.dom import minidom
3	from types import ListType
4	from types import TupleType
5
6	from zope.interface import implements, classProvides
7	from zope.app.annotation.interfaces import IAnnotations
8
9	from Products.CMFPlone.Portal import PloneSite
10	from Products.CMFCore import utils
11
12	from collective.transmogrifier.interfaces import ISection, ISectionBlueprint
13	from collective.transmogrifier.utils import defaultMatcher
14
15	from quintagroup.transmogrifier.interfaces import IExportDataCorrector, IImportDataCorrector
16	from quintagroup.transmogrifier.adapters.exporting import ReferenceExporter
17	from quintagroup.transmogrifier.manifest import ManifestExporterSection
18	from quintagroup.transmogrifier.logger import VALIDATIONKEY
19
20	from quintagroup.transmogrifier.simpleblog2quills.interfaces import IExportItemManipulator, IBlog
21
22	# URL of the site, where blog is located (this is needed to fix links in entries)
23	SITE_URLS = []
24	IMAGE_FOLDER = 'images'
25	IMAGE_FOLDER_TYPE = 'Large Plone Folder'
26	# this registries are needed to avoid loosing images with equal ids
27	IMAGE_IDS = []
28	IMAGE_PATHS = {}
29
30	class BlogManifest(object):
31	implements(IExportDataCorrector)
32
33	def __init__(self, context):
34	self.context = context
35
36	def __call__(self, data):
37	# flag that indicated whether 'images' folder must added to manifest
38	need_to_add = True
39
40	doc = minidom.parseString(data['data'])
41	root = doc.documentElement
42	for child in root.getElementsByTagName('record'):
43	if child.getAttribute('type') not in ('BlogEntry', 'BlogFolder'):
44	root.removeChild(child)
45	elif str(child.firstChild.nodeValue.strip()) == IMAGE_FOLDER:
46	# blog already contains object with IMAGE_FOLDER id
47	need_to_add = False
48
49	if need_to_add:
50	folder = doc.createElement('record')
51	folder.setAttribute('type', IMAGE_FOLDER_TYPE)
52	folder.appendChild(doc.createTextNode(IMAGE_FOLDER))
53	root.appendChild(folder)
54
55	data['data'] = doc.toxml('utf-8')
56	return data
57
58	class BlogFolderManifest(object):
59	implements(IExportDataCorrector)
60
61	def __init__(self, context):
62	self.context = context
63
64	def __call__(self, data):
65	doc = minidom.parseString(data['data'])
66	root = doc.documentElement
67	for child in root.getElementsByTagName('record'):
68	if child.getAttribute('type') not in ('BlogEntry', 'BlogFolder'):
69	root.removeChild(child)
70	data['data'] = doc.toxml('utf-8')
71	return data
72
73	class BlogEntryManifest(object):
74	implements(IExportItemManipulator)
75
76	def __init__(self, context):
77	self.context = context
78
79	def __call__(self, item, **kw):
80	# remove manifest data from item - content contained in BlogEntry isn't exported
81	if '_files' in item and 'manifest' in item['_files']:
82	del item['_files']['manifest']
83	return item
84
85	def recurseToInterface(item, ifaces):
86	"""Recurse up the aq_chain until an object providing `iface' is found,
87	and return that.
88	"""
89	if not isinstance(ifaces, (ListType, TupleType)):
90	ifaces = [ifaces]
91	parent = item.aq_parent
92	for iface in ifaces:
93	if iface.providedBy(item):
94	return item
95	for iface in ifaces:
96	if iface.providedBy(parent):
97	return parent
98	if isinstance(parent, PloneSite):
99	# Stop when we get to the portal root.
100	return None
101	return recurseToInterface(parent, ifaces)
102
103	def getUniqueId(image_id):
104	""" Generate id that is unique in IMAGE_IDS registry.
105	"""
106	if '.' in image_id:
107	name, ext = image_id.rsplit('.', 1)
108	ext = '.' + ext
109	else:
110	name, ext = image_id, ''
111	if image_id in IMAGE_IDS:
112	c = 1
113	new_id = name + str(c) + ext
114	while new_id in IMAGE_IDS:
115	c += 1
116	new_id = name + str(c) + ext
117	image_id = new_id
118
119	return image_id
120
121	class BlogEntryExporter(ReferenceExporter):
122	implements(IExportDataCorrector)
123
124	SRC = re.compile(r'src="([^"]+)"')
125
126	def __init__(self, context):
127	self.context = context
128	self.portal_url = utils.getToolByName(self.context, 'portal_url')
129	self.portal = self.portal_url.getPortalObject()
130
131	def __call__(self, data):
132	data = super(BlogEntryExporter, self).__call__(data)
133	doc = minidom.parseString(data['data'])
134	try:
135	elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'body'][0]
136	except IndexError:
137	return data
138
139	text = elem.firstChild.nodeValue
140	urls = self.SRC.findall(text)
141	blog = recurseToInterface(self.context, IBlog)
142	blog_path = blog.getPhysicalPath()
143	context_path = self.context.getPhysicalPath()
144	for url in urls:
145	url = str(url)
146	image_id = url.rsplit('/', 1)[-1]
147	# skip links with illegal url schema
148	if '://' in url and not url.startswith('http://'):
149	continue
150	# convert all all links to relative
151	if url.startswith('http://'):
152	for site in SITE_URLS:
153	if url.startswith(site):
154	# check whether image is stored in blog
155	relative_url = url[len(site):]
156	relative_url = relative_url.strip('/')
157	# if link is broken we'll get an AttributeError
158	try:
159	image = self.portal.unrestrictedTraverse(relative_url)
160	except AttributeError:
161	break
162	in_blog = recurseToInterface(image, IBlog) is not None and True or False
163	if in_blog:
164	image_id = self.fixImageId(image, image_id, blog_path)
165	level = len(context_path) - len(blog_path) - 1
166	new_url = '/'.join(['..' for i in range(level)])
167	new_url = '/'.join((new_url, IMAGE_FOLDER, image_id))
168	text = text.replace(url, new_url, 1)
169	else:
170	# find how many levels self.context is under portal root
171	level = len(context_path) - 3
172	new_url = '/'.join(['..' for i in range(level)])
173	new_url = new_url + '/' + relative_url
174	text = text.replace(url, new_url, 1)
175	break
176	else:
177	if url.startswith('/'):
178	# if link is broken we'll get an AttributeError
179	try:
180	image = self.portal.unrestrictedTraverse(url.strip('/'))
181	except AttributeError:
182	continue
183	else:
184	# if link is broken we'll get an AttributeError
185	try:
186	image = self.context.unrestrictedTraverse(url)
187	except AttributeError:
188	continue
189	in_blog = recurseToInterface(image, IBlog) is not None and True or False
190	if in_blog:
191	image_id = self.fixImageId(image, image_id, blog_path)
192	level = len(context_path) - len(blog_path) - 1
193	new_url = '/'.join(['..' for i in range(level)])
194	new_url = '/'.join([new_url, IMAGE_FOLDER, image_id])
195	text = text.replace(url, new_url, 1)
196	elif url.startswith('../'):
197	# remove '../' from the start of string
198	new_url = url[3:]
199	text = text.replace(url, new_url, 1)
200	elif url.startswith('/'):
201	# these links didn't work so rewrite them with '..'
202	# find how many levels self.context is under portal root
203	level = len(context_path) - 3
204	new_url = '/'.join(['..' for i in range(level)])
205	new_url = new_url + url
206	text = text.replace(url, new_url, 1)
207
208	elem.firstChild.nodeValue = text
209	data['data'] = doc.toxml('utf-8')
210	return data
211
212	def fixImageId(self, image, image_id, blog_path):
213	""" Check whether image is good or generate new if it's bad.
214	"""
215	image_path = '/'.join(image.getPhysicalPath())
216	if image_id in IMAGE_IDS and image_path not in IMAGE_PATHS:
217	image_id = getUniqueId(image_id)
218	if image_id not in IMAGE_IDS:
219	IMAGE_IDS.append(image_id)
220	IMAGE_PATHS[image_path] = '/'.join(blog_path[2:] + (IMAGE_FOLDER, image_id))
221
222	return image_id
223
224	class PathRewriter(object):
225	implements(IExportItemManipulator)
226
227	def __init__(self, context):
228	self.context = context
229
230	def __call__(self, item, **kw):
231	pathkey = kw.get('path')
232	if pathkey is None:
233	return item
234
235	path = item[pathkey]
236	blog = recurseToInterface(self.context, IBlog)
237	if blog is None:
238	return item
239
240	blog_path = blog.getPhysicalPath()
241	full_path = '/'.join(self.context.getPhysicalPath())
242	image_id = path.rsplit('/', 1)[-1]
243	modified = False
244
245	if full_path in IMAGE_PATHS:
246	new_path = IMAGE_PATHS[full_path]
247	else:
248	unique_id = getUniqueId(image_id)
249	modified = image_id != unique_id
250	new_path = '/'.join(blog_path[2:] + (IMAGE_FOLDER, unique_id))
251
252	IMAGE_IDS.append(image_id)
253	IMAGE_PATHS[full_path] = new_path
254
255	# change item's path
256	item[pathkey] = new_path
257	item['_oldpath'] = path
258
259	# now we need to fix object id in .marshall.xml
260	if modified:
261	if '_files' in item and 'marshall' in item['_files']:
262	doc = minidom.parseString(item['_files']['marshall']['data'])
263	elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'id'][0]
264	elem.firstChild.nodeValue = '\n\t\t%s\n\t' % unique_id
265	item['_files']['marshall']['data'] = doc.toxml('utf-8')
266
267	return item
268
269	class ImageFolderSection(object):
270	""" This section will generate manifest files for image folders in blog.
271	"""
272	classProvides(ISectionBlueprint)
273	implements(ISection)
274
275	def __init__(self, transmogrifier, name, options, previous):
276	self.previous = previous
277	self.transmogrifier = transmogrifier
278
279	self.flagkey = defaultMatcher(options, 'old-path-key', name, 'oldpath')
280	self.typekey = defaultMatcher(options, 'type-key', name, 'type')
281	self.pathkey = defaultMatcher(options, 'path-key', name, 'path')
282
283
284	site_urls = options.get('site-urls', '')
285	site_urls = filter(None, [i.strip() for i in site_urls.splitlines()])
286	for i in site_urls:
287	SITE_URLS.append(i)
288
289	self.anno = IAnnotations(transmogrifier)
290
291	def __iter__(self):
292	folders = {}
293
294	# safely get logging storage
295	if VALIDATIONKEY in self.anno:
296	log_storage = self.anno[VALIDATIONKEY]
297	else:
298	log_storage = None
299
300	for item in self.previous:
301	item_keys = item.keys()
302	pathkey = self.pathkey(*item_keys)[0]
303	typekey = self.typekey(*item_keys)[0]
304	oldpathkey = self.flagkey(*item_keys)[0]
305
306	# collect data about images moved to folders
307	if pathkey and typekey and oldpathkey:
308	path = item[pathkey]
309	old_path = item[oldpathkey]
310	type_ = item[typekey]
311	folder_path, image_id = path.rsplit('/', 1)
312	folders.setdefault(folder_path, []).append((image_id, type_))
313
314	# update logging data (path) for this item
315	if log_storage and log_storage[-1] == old_path:
316	log_storage.pop()
317	log_storage.append(path)
318
319	yield item
320
321	# generate manifests for those image folders
322	items = []
323	for folder, entries in folders.items():
324	items.append({'_entries': entries, pathkey: folder})
325	exporter = ManifestExporterSection(self.transmogrifier, 'manifest', {'blueprint': 'manifest'}, iter(items))
326	for item in exporter:
327	yield item
328
329	# clean registries
330	while IMAGE_IDS: IMAGE_IDS.pop()
331	while SITE_URLS: SITE_URLS.pop()
332	IMAGE_PATHS.clear()
333
334	class WorkflowImporter(object):
335	""" This adapter tries to convert all possible workflow histories to
336	simple_publication_workflow history.
337	"""
338	implements(IImportDataCorrector)
339
340	def __init__(self, context):
341	self.context = context
342
343	def __call__(self, data):
344	doc = minidom.parseString(data['data'])
345	wh = [i for i in doc.getElementsByTagName('cmf:workflow')]
346	if not wh:
347	# we don't have such workflow history
348	return data
349
350	wh = wh[0]
351	workflow_id = wh.getAttribute('id')
352	if workflow_id == 'simple_publication_workflow':
353	return data
354	wh.setAttribute('id', 'simple_publication_workflow')
355	if workflow_id == 'simpleblog_workflow':
356	self.fixSimpleBlogWorkflow(wh)
357	else:
358	self.fixWorkflow(wh)
359
360	data['data'] = doc.toxml('utf-8')
361	return data
362
363	def fixSimpleBlogWorkflow(self, wh):
364	for history in wh.getElementsByTagName('cmf:history'):
365	for var in history.getElementsByTagName('cmf:var'):
366	id_ = var.getAttribute('id')
367	value = var.getAttribute('value')
368	if id_ == 'review_state' and value == 'draft':
369	var.setAttribute('value', 'private')
370
371	def fixWorkflow(self, wh):
372	for history in wh.getElementsByTagName('cmf:history'):
373	for var in history.getElementsByTagName('cmf:var'):
374	id_ = var.getAttribute('id')
375	value = var.getAttribute('value')
376	if id_ == 'review_state' and value == 'visible':
377	var.setAttribute('value', 'published')

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: products/quintagroup.transmogrifier.simpleblog2quills/trunk/quintagroup/transmogrifier/simpleblog2quills/adapters.py @ 1241

Download in other formats: