Context Navigation

source: products/quintagroup.transmogrifier.simpleblog2quills/trunk/quintagroup/transmogrifier/simpleblog2quills/adapters.py @ 1193

Last change on this file since 1193 was 1193, checked in by koval, 15 years ago
fixed bug in rewriting of absolute links in blog entry's text
File size: 13.7 KB

Rev	Line
[532]	1	import re
	2	from xml.dom import minidom
	3	from types import ListType
	4	from types import TupleType
	5
[533]	6	from zope.interface import implements, classProvides
[554]	7	from zope.app.annotation.interfaces import IAnnotations
	8
[532]	9	from Products.CMFPlone.Portal import PloneSite
[533]	10	from Products.CMFCore import utils
[532]	11
[533]	12	from collective.transmogrifier.interfaces import ISection, ISectionBlueprint
	13	from collective.transmogrifier.utils import defaultMatcher
	14
[612]	15	from quintagroup.transmogrifier.interfaces import IExportDataCorrector, IImportDataCorrector
[532]	16	from quintagroup.transmogrifier.adapters.exporting import ReferenceExporter
[533]	17	from quintagroup.transmogrifier.manifest import ManifestExporterSection
[554]	18	from quintagroup.transmogrifier.logger import VALIDATIONKEY
[532]	19
	20	from quintagroup.transmogrifier.simpleblog2quills.interfaces import IExportItemManipulator, IBlog
	21
[554]	22	# URL of the site, where blog is located (this is needed to fix links in entries)
	23	SITE_URLS = []
[532]	24	IMAGE_FOLDER = 'images'
[554]	25	# this registries are needed to avoid loosing images with equal ids
	26	IMAGE_IDS = []
	27	IMAGE_PATHS = {}
[532]	28
[533]	29	class BlogManifest(object):
[532]	30	implements(IExportDataCorrector)
	31
	32	def __init__(self, context):
	33	self.context = context
	34
	35	def __call__(self, data):
[586]	36	doc = minidom.parseString(data['data'])
	37	root = doc.documentElement
	38	for child in root.getElementsByTagName('record'):
	39	if child.getAttribute('type') not in ('BlogEntry', 'BlogFolder'):
	40	root.removeChild(child)
	41	folder = doc.createElement('record')
[594]	42	folder.setAttribute('type', 'Large Plone Folder')
[586]	43	folder.appendChild(doc.createTextNode(IMAGE_FOLDER))
	44	root.appendChild(folder)
	45	data['data'] = doc.toxml('utf-8')
[532]	46	return data
	47
[586]	48	class BlogFolderManifest(object):
	49	implements(IExportDataCorrector)
	50
	51	def __init__(self, context):
	52	self.context = context
	53
	54	def __call__(self, data):
	55	doc = minidom.parseString(data['data'])
	56	root = doc.documentElement
	57	for child in root.getElementsByTagName('record'):
	58	if child.getAttribute('type') not in ('BlogEntry', 'BlogFolder'):
	59	root.removeChild(child)
	60	data['data'] = doc.toxml('utf-8')
	61	return data
	62
[533]	63	class BlogEntryManifest(object):
	64	implements(IExportItemManipulator)
	65
	66	def __init__(self, context):
	67	self.context = context
	68
	69	def __call__(self, item, **kw):
	70	# remove manifest data from item - content contained in BlogEntry isn't exported
	71	if '_files' in item and 'manifest' in item['_files']:
	72	del item['_files']['manifest']
	73	return item
	74
[532]	75	def recurseToInterface(item, ifaces):
	76	"""Recurse up the aq_chain until an object providing `iface' is found,
	77	and return that.
	78	"""
	79	if not isinstance(ifaces, (ListType, TupleType)):
	80	ifaces = [ifaces]
	81	parent = item.aq_parent
	82	for iface in ifaces:
	83	if iface.providedBy(item):
	84	return item
	85	for iface in ifaces:
	86	if iface.providedBy(parent):
	87	return parent
	88	if isinstance(parent, PloneSite):
	89	# Stop when we get to the portal root.
	90	return None
	91	return recurseToInterface(parent, ifaces)
	92
[554]	93	def getUniqueId(image_id):
	94	""" Generate id that is unique in IMAGE_IDS registry.
	95	"""
	96	if '.' in image_id:
	97	name, ext = image_id.rsplit('.', 1)
	98	ext = '.' + ext
	99	else:
	100	name, ext = image_id, ''
	101	if image_id in IMAGE_IDS:
	102	c = 1
	103	new_id = name + str(c) + ext
	104	while new_id in IMAGE_IDS:
	105	c += 1
	106	new_id = name + str(c) + ext
	107	image_id = new_id
	108
	109	return image_id
	110
[532]	111	class BlogEntryExporter(ReferenceExporter):
	112	implements(IExportDataCorrector)
	113
	114	SRC = re.compile(r'src="([^"]+)"')
	115
[533]	116	def __init__(self, context):
	117	self.context = context
	118	self.portal_url = utils.getToolByName(self.context, 'portal_url')
	119	self.portal = self.portal_url.getPortalObject()
	120
[532]	121	def __call__(self, data):
[533]	122	data = super(BlogEntryExporter, self).__call__(data)
[532]	123	doc = minidom.parseString(data['data'])
	124	try:
	125	elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'body'][0]
	126	except IndexError:
	127	return data
	128
	129	text = elem.firstChild.nodeValue
	130	urls = self.SRC.findall(text)
	131	blog = recurseToInterface(self.context, IBlog)
	132	blog_path = blog.getPhysicalPath()
[1193]	133	context_path = self.context.getPhysicalPath()
[532]	134	for url in urls:
[533]	135	url = str(url)
[532]	136	image_id = url.rsplit('/', 1)[-1]
[1193]	137	# skip links with illegal url schema
[533]	138	if '://' in url and not url.startswith('http://'):
	139	continue
[1193]	140	# convert all all links to relative
[532]	141	if url.startswith('http://'):
[554]	142	for site in SITE_URLS:
[533]	143	if url.startswith(site):
	144	# check whether image is stored in blog
	145	relative_url = url[len(site):]
	146	relative_url = relative_url.strip('/')
	147	# if link is broken we'll get an AttributeError
	148	try:
	149	image = self.portal.unrestrictedTraverse(relative_url)
	150	except AttributeError:
	151	break
	152	in_blog = recurseToInterface(image, IBlog) is not None and True or False
	153	if in_blog:
[554]	154	image_id = self.fixImageId(image, image_id, blog_path)
[1193]	155	level = len(context_path) - len(blog_path) - 1
	156	new_url = '/'.join(['..' for i in range(level)])
	157	new_url = '/'.join((new_url, IMAGE_FOLDER, image_id))
[533]	158	text = text.replace(url, new_url, 1)
[1193]	159	else:
	160	# find how many levels self.context is under portal root
	161	level = len(context_path) - 3
	162	new_url = '/'.join(['..' for i in range(level)])
	163	new_url = new_url + '/' + relative_url
	164	text = text.replace(url, new_url, 1)
[533]	165	break
[532]	166	else:
[533]	167	if url.startswith('/'):
	168	# if link is broken we'll get an AttributeError
	169	try:
	170	image = self.portal.unrestrictedTraverse(url.strip('/'))
	171	except AttributeError:
	172	continue
	173	else:
	174	# if link is broken we'll get an AttributeError
	175	try:
	176	image = self.context.unrestrictedTraverse(url)
	177	except AttributeError:
	178	continue
	179	in_blog = recurseToInterface(image, IBlog) is not None and True or False
	180	if in_blog:
[554]	181	image_id = self.fixImageId(image, image_id, blog_path)
[1193]	182	level = len(context_path) - len(blog_path) - 1
[533]	183	new_url = '/'.join(['..' for i in range(level)])
	184	new_url = '/'.join([new_url, IMAGE_FOLDER, image_id])
	185	text = text.replace(url, new_url, 1)
[586]	186	elif url.startswith('../'):
[1193]	187	# remove '../' from the start of string
[586]	188	new_url = url[3:]
	189	text = text.replace(url, new_url, 1)
	190	elif url.startswith('/'):
	191	# these links didn't work so rewrite them with '..'
[1193]	192	# find how many levels self.context is under portal root
	193	level = len(context_path) - 3
[586]	194	new_url = '/'.join(['..' for i in range(level)])
	195	new_url = new_url + url
	196	text = text.replace(url, new_url, 1)
[532]	197
	198	elem.firstChild.nodeValue = text
	199	data['data'] = doc.toxml('utf-8')
	200	return data
	201
[554]	202	def fixImageId(self, image, image_id, blog_path):
	203	""" Check whether image is good or generate new if it's bad.
	204	"""
	205	image_path = '/'.join(image.getPhysicalPath())
	206	if image_id in IMAGE_IDS and image_path not in IMAGE_PATHS:
	207	image_id = getUniqueId(image_id)
	208	if image_id not in IMAGE_IDS:
	209	IMAGE_IDS.append(image_id)
	210	IMAGE_PATHS[image_path] = '/'.join(blog_path[2:] + (IMAGE_FOLDER, image_id))
	211
	212	return image_id
	213
[532]	214	class PathRewriter(object):
	215	implements(IExportItemManipulator)
	216
	217	def __init__(self, context):
	218	self.context = context
	219
	220	def __call__(self, item, **kw):
	221	pathkey = kw.get('path')
	222	if pathkey is None:
	223	return item
	224
	225	path = item[pathkey]
	226	blog = recurseToInterface(self.context, IBlog)
	227	if blog is None:
	228	return item
	229
[554]	230	blog_path = blog.getPhysicalPath()
	231	full_path = '/'.join(self.context.getPhysicalPath())
	232	image_id = path.rsplit('/', 1)[-1]
	233	modified = False
	234
	235	if full_path in IMAGE_PATHS:
	236	new_path = IMAGE_PATHS[full_path]
	237	else:
	238	unique_id = getUniqueId(image_id)
	239	modified = image_id != unique_id
	240	new_path = '/'.join(blog_path[2:] + (IMAGE_FOLDER, unique_id))
	241
	242	IMAGE_IDS.append(image_id)
	243	IMAGE_PATHS[full_path] = new_path
	244
	245	# change item's path
[532]	246	item[pathkey] = new_path
[554]	247	item['_oldpath'] = path
[532]	248
[554]	249	# now we need to fix object id in .marshall.xml
	250	if modified:
	251	if '_files' in item and 'marshall' in item['_files']:
	252	doc = minidom.parseString(item['_files']['marshall']['data'])
	253	elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'id'][0]
	254	elem.firstChild.nodeValue = '\n\t\t%s\n\t' % unique_id
	255	item['_files']['marshall']['data'] = doc.toxml('utf-8')
	256
[532]	257	return item
[533]	258
	259	class ImageFolderSection(object):
	260	""" This section will generate manifest files for image folders in blog.
	261	"""
	262	classProvides(ISectionBlueprint)
	263	implements(ISection)
	264
	265	def __init__(self, transmogrifier, name, options, previous):
	266	self.previous = previous
	267	self.transmogrifier = transmogrifier
	268
[554]	269	self.flagkey = defaultMatcher(options, 'old-path-key', name, 'oldpath')
[533]	270	self.typekey = defaultMatcher(options, 'type-key', name, 'type')
	271	self.pathkey = defaultMatcher(options, 'path-key', name, 'path')
	272
[554]	273
	274	site_urls = options.get('site-urls', '')
	275	site_urls = filter(None, [i.strip() for i in site_urls.splitlines()])
	276	for i in site_urls:
	277	SITE_URLS.append(i)
	278
	279	self.anno = IAnnotations(transmogrifier)
	280
[533]	281	def __iter__(self):
	282	folders = {}
	283
[554]	284	# safely get logging storage
	285	if VALIDATIONKEY in self.anno:
	286	log_storage = self.anno[VALIDATIONKEY]
	287	else:
	288	log_storage = None
	289
[533]	290	for item in self.previous:
[554]	291	item_keys = item.keys()
	292	pathkey = self.pathkey(*item_keys)[0]
	293	typekey = self.typekey(*item_keys)[0]
	294	oldpathkey = self.flagkey(*item_keys)[0]
[533]	295
	296	# collect data about images moved to folders
[554]	297	if pathkey and typekey and oldpathkey:
[533]	298	path = item[pathkey]
[554]	299	old_path = item[oldpathkey]
[533]	300	type_ = item[typekey]
	301	folder_path, image_id = path.rsplit('/', 1)
	302	folders.setdefault(folder_path, []).append((image_id, type_))
	303
[554]	304	# update logging data (path) for this item
	305	if log_storage and log_storage[-1] == old_path:
	306	log_storage.pop()
	307	log_storage.append(path)
	308
[533]	309	yield item
	310
	311	# generate manifests for those image folders
	312	items = []
	313	for folder, entries in folders.items():
	314	items.append({'_entries': entries, pathkey: folder})
	315	exporter = ManifestExporterSection(self.transmogrifier, 'manifest', {'blueprint': 'manifest'}, iter(items))
	316	for item in exporter:
	317	yield item
[554]	318
	319	# clean registries
	320	while IMAGE_IDS: IMAGE_IDS.pop()
	321	while SITE_URLS: SITE_URLS.pop()
	322	IMAGE_PATHS.clear()
[612]	323
	324	class WorkflowImporter(object):
	325	""" This adapter tries to convert all possible workflow histories to
	326	simple_publication_workflow history.
	327	"""
	328	implements(IImportDataCorrector)
	329
	330	def __init__(self, context):
	331	self.context = context
	332
	333	def __call__(self, data):
	334	doc = minidom.parseString(data['data'])
	335	wh = [i for i in doc.getElementsByTagName('cmf:workflow')]
	336	if not wh:
	337	# we don't have such workflow history
	338	return data
	339
	340	wh = wh[0]
	341	workflow_id = wh.getAttribute('id')
	342	if workflow_id == 'simple_publication_workflow':
	343	return data
	344	wh.setAttribute('id', 'simple_publication_workflow')
	345	if workflow_id == 'simpleblog_workflow':
	346	self.fixSimpleBlogWorkflow(wh)
	347	else:
	348	self.fixWorkflow(wh)
	349
	350	data['data'] = doc.toxml('utf-8')
	351	return data
	352
	353	def fixSimpleBlogWorkflow(self, wh):
	354	for history in wh.getElementsByTagName('cmf:history'):
	355	for var in history.getElementsByTagName('cmf:var'):
	356	id_ = var.getAttribute('id')
	357	value = var.getAttribute('value')
	358	if id_ == 'review_state' and value == 'draft':
	359	var.setAttribute('value', 'private')
	360
	361	def fixWorkflow(self, wh):
	362	for history in wh.getElementsByTagName('cmf:history'):
	363	for var in history.getElementsByTagName('cmf:var'):
	364	id_ = var.getAttribute('id')
	365	value = var.getAttribute('value')
	366	if id_ == 'review_state' and value == 'visible':
	367	var.setAttribute('value', 'published')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: