Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

adapters.py @ 1490

Last change on this file since 1490 was 1490, checked in by koval, 14 years ago
renaming all references of quintagroup.transmogrifier.simpleblog2quills
File size: 14.0 KB

Rev	Line
[532]	1	import re
	2	from xml.dom import minidom
	3	from types import ListType
	4	from types import TupleType
	5
[533]	6	from zope.interface import implements, classProvides
[554]	7	from zope.app.annotation.interfaces import IAnnotations
	8
[532]	9	from Products.CMFPlone.Portal import PloneSite
[533]	10	from Products.CMFCore import utils
[532]	11
[533]	12	from collective.transmogrifier.interfaces import ISection, ISectionBlueprint
	13	from collective.transmogrifier.utils import defaultMatcher
	14
[612]	15	from quintagroup.transmogrifier.interfaces import IExportDataCorrector, IImportDataCorrector
[532]	16	from quintagroup.transmogrifier.adapters.exporting import ReferenceExporter
[533]	17	from quintagroup.transmogrifier.manifest import ManifestExporterSection
[554]	18	from quintagroup.transmogrifier.logger import VALIDATIONKEY
[532]	19
[1490]	20	from quintagroup.transmogrify.simpleblog2quills.interfaces import IExportItemManipulator, IBlog
[532]	21
[554]	22	# URL of the site, where blog is located (this is needed to fix links in entries)
	23	SITE_URLS = []
[532]	24	IMAGE_FOLDER = 'images'
[1241]	25	IMAGE_FOLDER_TYPE = 'Large Plone Folder'
[554]	26	# this registries are needed to avoid loosing images with equal ids
	27	IMAGE_IDS = []
	28	IMAGE_PATHS = {}
[532]	29
[533]	30	class BlogManifest(object):
[532]	31	implements(IExportDataCorrector)
	32
	33	def __init__(self, context):
	34	self.context = context
	35
	36	def __call__(self, data):
[1241]	37	# flag that indicated whether 'images' folder must added to manifest
	38	need_to_add = True
	39
[586]	40	doc = minidom.parseString(data['data'])
	41	root = doc.documentElement
	42	for child in root.getElementsByTagName('record'):
	43	if child.getAttribute('type') not in ('BlogEntry', 'BlogFolder'):
	44	root.removeChild(child)
[1241]	45	elif str(child.firstChild.nodeValue.strip()) == IMAGE_FOLDER:
	46	# blog already contains object with IMAGE_FOLDER id
	47	need_to_add = False
	48
	49	if need_to_add:
	50	folder = doc.createElement('record')
	51	folder.setAttribute('type', IMAGE_FOLDER_TYPE)
	52	folder.appendChild(doc.createTextNode(IMAGE_FOLDER))
	53	root.appendChild(folder)
	54
[586]	55	data['data'] = doc.toxml('utf-8')
[532]	56	return data
	57
[586]	58	class BlogFolderManifest(object):
	59	implements(IExportDataCorrector)
	60
	61	def __init__(self, context):
	62	self.context = context
	63
	64	def __call__(self, data):
	65	doc = minidom.parseString(data['data'])
	66	root = doc.documentElement
	67	for child in root.getElementsByTagName('record'):
	68	if child.getAttribute('type') not in ('BlogEntry', 'BlogFolder'):
	69	root.removeChild(child)
	70	data['data'] = doc.toxml('utf-8')
	71	return data
	72
[533]	73	class BlogEntryManifest(object):
	74	implements(IExportItemManipulator)
	75
	76	def __init__(self, context):
	77	self.context = context
	78
	79	def __call__(self, item, **kw):
	80	# remove manifest data from item - content contained in BlogEntry isn't exported
	81	if '_files' in item and 'manifest' in item['_files']:
	82	del item['_files']['manifest']
	83	return item
	84
[532]	85	def recurseToInterface(item, ifaces):
	86	"""Recurse up the aq_chain until an object providing `iface' is found,
	87	and return that.
	88	"""
	89	if not isinstance(ifaces, (ListType, TupleType)):
	90	ifaces = [ifaces]
	91	parent = item.aq_parent
	92	for iface in ifaces:
	93	if iface.providedBy(item):
	94	return item
	95	for iface in ifaces:
	96	if iface.providedBy(parent):
	97	return parent
	98	if isinstance(parent, PloneSite):
	99	# Stop when we get to the portal root.
	100	return None
	101	return recurseToInterface(parent, ifaces)
	102
[554]	103	def getUniqueId(image_id):
	104	""" Generate id that is unique in IMAGE_IDS registry.
	105	"""
	106	if '.' in image_id:
	107	name, ext = image_id.rsplit('.', 1)
	108	ext = '.' + ext
	109	else:
	110	name, ext = image_id, ''
	111	if image_id in IMAGE_IDS:
	112	c = 1
	113	new_id = name + str(c) + ext
	114	while new_id in IMAGE_IDS:
	115	c += 1
	116	new_id = name + str(c) + ext
	117	image_id = new_id
	118
	119	return image_id
	120
[532]	121	class BlogEntryExporter(ReferenceExporter):
	122	implements(IExportDataCorrector)
	123
	124	SRC = re.compile(r'src="([^"]+)"')
	125
[533]	126	def __init__(self, context):
	127	self.context = context
	128	self.portal_url = utils.getToolByName(self.context, 'portal_url')
	129	self.portal = self.portal_url.getPortalObject()
	130
[532]	131	def __call__(self, data):
[533]	132	data = super(BlogEntryExporter, self).__call__(data)
[532]	133	doc = minidom.parseString(data['data'])
	134	try:
	135	elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'body'][0]
	136	except IndexError:
	137	return data
	138
	139	text = elem.firstChild.nodeValue
	140	urls = self.SRC.findall(text)
	141	blog = recurseToInterface(self.context, IBlog)
	142	blog_path = blog.getPhysicalPath()
[1193]	143	context_path = self.context.getPhysicalPath()
[532]	144	for url in urls:
[533]	145	url = str(url)
[532]	146	image_id = url.rsplit('/', 1)[-1]
[1193]	147	# skip links with illegal url schema
[533]	148	if '://' in url and not url.startswith('http://'):
	149	continue
[1193]	150	# convert all all links to relative
[532]	151	if url.startswith('http://'):
[554]	152	for site in SITE_URLS:
[533]	153	if url.startswith(site):
	154	# check whether image is stored in blog
	155	relative_url = url[len(site):]
	156	relative_url = relative_url.strip('/')
	157	# if link is broken we'll get an AttributeError
	158	try:
	159	image = self.portal.unrestrictedTraverse(relative_url)
	160	except AttributeError:
	161	break
	162	in_blog = recurseToInterface(image, IBlog) is not None and True or False
	163	if in_blog:
[554]	164	image_id = self.fixImageId(image, image_id, blog_path)
[1193]	165	level = len(context_path) - len(blog_path) - 1
	166	new_url = '/'.join(['..' for i in range(level)])
	167	new_url = '/'.join((new_url, IMAGE_FOLDER, image_id))
[533]	168	text = text.replace(url, new_url, 1)
[1193]	169	else:
	170	# find how many levels self.context is under portal root
	171	level = len(context_path) - 3
	172	new_url = '/'.join(['..' for i in range(level)])
	173	new_url = new_url + '/' + relative_url
	174	text = text.replace(url, new_url, 1)
[533]	175	break
[532]	176	else:
[533]	177	if url.startswith('/'):
	178	# if link is broken we'll get an AttributeError
	179	try:
	180	image = self.portal.unrestrictedTraverse(url.strip('/'))
	181	except AttributeError:
	182	continue
	183	else:
	184	# if link is broken we'll get an AttributeError
	185	try:
	186	image = self.context.unrestrictedTraverse(url)
	187	except AttributeError:
	188	continue
	189	in_blog = recurseToInterface(image, IBlog) is not None and True or False
	190	if in_blog:
[554]	191	image_id = self.fixImageId(image, image_id, blog_path)
[1193]	192	level = len(context_path) - len(blog_path) - 1
[533]	193	new_url = '/'.join(['..' for i in range(level)])
	194	new_url = '/'.join([new_url, IMAGE_FOLDER, image_id])
	195	text = text.replace(url, new_url, 1)
[586]	196	elif url.startswith('../'):
[1193]	197	# remove '../' from the start of string
[586]	198	new_url = url[3:]
	199	text = text.replace(url, new_url, 1)
	200	elif url.startswith('/'):
	201	# these links didn't work so rewrite them with '..'
[1193]	202	# find how many levels self.context is under portal root
	203	level = len(context_path) - 3
[586]	204	new_url = '/'.join(['..' for i in range(level)])
	205	new_url = new_url + url
	206	text = text.replace(url, new_url, 1)
[532]	207
	208	elem.firstChild.nodeValue = text
	209	data['data'] = doc.toxml('utf-8')
	210	return data
	211
[554]	212	def fixImageId(self, image, image_id, blog_path):
	213	""" Check whether image is good or generate new if it's bad.
	214	"""
	215	image_path = '/'.join(image.getPhysicalPath())
	216	if image_id in IMAGE_IDS and image_path not in IMAGE_PATHS:
	217	image_id = getUniqueId(image_id)
	218	if image_id not in IMAGE_IDS:
	219	IMAGE_IDS.append(image_id)
	220	IMAGE_PATHS[image_path] = '/'.join(blog_path[2:] + (IMAGE_FOLDER, image_id))
	221
	222	return image_id
	223
[532]	224	class PathRewriter(object):
	225	implements(IExportItemManipulator)
	226
	227	def __init__(self, context):
	228	self.context = context
	229
	230	def __call__(self, item, **kw):
	231	pathkey = kw.get('path')
	232	if pathkey is None:
	233	return item
	234
	235	path = item[pathkey]
	236	blog = recurseToInterface(self.context, IBlog)
	237	if blog is None:
	238	return item
	239
[554]	240	blog_path = blog.getPhysicalPath()
	241	full_path = '/'.join(self.context.getPhysicalPath())
	242	image_id = path.rsplit('/', 1)[-1]
	243	modified = False
	244
	245	if full_path in IMAGE_PATHS:
	246	new_path = IMAGE_PATHS[full_path]
	247	else:
	248	unique_id = getUniqueId(image_id)
	249	modified = image_id != unique_id
	250	new_path = '/'.join(blog_path[2:] + (IMAGE_FOLDER, unique_id))
	251
	252	IMAGE_IDS.append(image_id)
	253	IMAGE_PATHS[full_path] = new_path
	254
	255	# change item's path
[532]	256	item[pathkey] = new_path
[554]	257	item['_oldpath'] = path
[532]	258
[554]	259	# now we need to fix object id in .marshall.xml
	260	if modified:
	261	if '_files' in item and 'marshall' in item['_files']:
	262	doc = minidom.parseString(item['_files']['marshall']['data'])
	263	elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'id'][0]
	264	elem.firstChild.nodeValue = '\n\t\t%s\n\t' % unique_id
	265	item['_files']['marshall']['data'] = doc.toxml('utf-8')
	266
[532]	267	return item
[533]	268
	269	class ImageFolderSection(object):
	270	""" This section will generate manifest files for image folders in blog.
	271	"""
	272	classProvides(ISectionBlueprint)
	273	implements(ISection)
	274
	275	def __init__(self, transmogrifier, name, options, previous):
	276	self.previous = previous
	277	self.transmogrifier = transmogrifier
	278
[554]	279	self.flagkey = defaultMatcher(options, 'old-path-key', name, 'oldpath')
[533]	280	self.typekey = defaultMatcher(options, 'type-key', name, 'type')
	281	self.pathkey = defaultMatcher(options, 'path-key', name, 'path')
	282
[554]	283
	284	site_urls = options.get('site-urls', '')
	285	site_urls = filter(None, [i.strip() for i in site_urls.splitlines()])
	286	for i in site_urls:
	287	SITE_URLS.append(i)
	288
	289	self.anno = IAnnotations(transmogrifier)
	290
[533]	291	def __iter__(self):
	292	folders = {}
	293
[554]	294	# safely get logging storage
	295	if VALIDATIONKEY in self.anno:
	296	log_storage = self.anno[VALIDATIONKEY]
	297	else:
	298	log_storage = None
	299
[533]	300	for item in self.previous:
[554]	301	item_keys = item.keys()
	302	pathkey = self.pathkey(*item_keys)[0]
	303	typekey = self.typekey(*item_keys)[0]
	304	oldpathkey = self.flagkey(*item_keys)[0]
[533]	305
	306	# collect data about images moved to folders
[554]	307	if pathkey and typekey and oldpathkey:
[533]	308	path = item[pathkey]
[554]	309	old_path = item[oldpathkey]
[533]	310	type_ = item[typekey]
	311	folder_path, image_id = path.rsplit('/', 1)
	312	folders.setdefault(folder_path, []).append((image_id, type_))
	313
[554]	314	# update logging data (path) for this item
	315	if log_storage and log_storage[-1] == old_path:
	316	log_storage.pop()
	317	log_storage.append(path)
	318
[533]	319	yield item
	320
	321	# generate manifests for those image folders
	322	items = []
	323	for folder, entries in folders.items():
	324	items.append({'_entries': entries, pathkey: folder})
	325	exporter = ManifestExporterSection(self.transmogrifier, 'manifest', {'blueprint': 'manifest'}, iter(items))
	326	for item in exporter:
	327	yield item
[554]	328
	329	# clean registries
	330	while IMAGE_IDS: IMAGE_IDS.pop()
	331	while SITE_URLS: SITE_URLS.pop()
	332	IMAGE_PATHS.clear()
[612]	333
	334	class WorkflowImporter(object):
	335	""" This adapter tries to convert all possible workflow histories to
	336	simple_publication_workflow history.
	337	"""
	338	implements(IImportDataCorrector)
	339
	340	def __init__(self, context):
	341	self.context = context
	342
	343	def __call__(self, data):
	344	doc = minidom.parseString(data['data'])
	345	wh = [i for i in doc.getElementsByTagName('cmf:workflow')]
	346	if not wh:
	347	# we don't have such workflow history
	348	return data
	349
	350	wh = wh[0]
	351	workflow_id = wh.getAttribute('id')
	352	if workflow_id == 'simple_publication_workflow':
	353	return data
	354	wh.setAttribute('id', 'simple_publication_workflow')
	355	if workflow_id == 'simpleblog_workflow':
	356	self.fixSimpleBlogWorkflow(wh)
	357	else:
	358	self.fixWorkflow(wh)
	359
	360	data['data'] = doc.toxml('utf-8')
	361	return data
	362
	363	def fixSimpleBlogWorkflow(self, wh):
	364	for history in wh.getElementsByTagName('cmf:history'):
	365	for var in history.getElementsByTagName('cmf:var'):
	366	id_ = var.getAttribute('id')
	367	value = var.getAttribute('value')
	368	if id_ == 'review_state' and value == 'draft':
	369	var.setAttribute('value', 'private')
	370
	371	def fixWorkflow(self, wh):
	372	for history in wh.getElementsByTagName('cmf:history'):
	373	for var in history.getElementsByTagName('cmf:var'):
	374	id_ = var.getAttribute('id')
	375	value = var.getAttribute('value')
	376	if id_ == 'review_state' and value == 'visible':
	377	var.setAttribute('value', 'published')

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: products/quintagroup.transmogrify.simpleblog2quills/trunk/quintagroup/transmogrify/simpleblog2quills/adapters.py @ 1490

Download in other formats: