import re
from xml.dom import minidom
from types import ListType
from types import TupleType

from zope.interface import implements, classProvides
from zope.app.annotation.interfaces import IAnnotations

from Products.CMFPlone.Portal import PloneSite
from Products.CMFCore import utils

from collective.transmogrifier.interfaces import ISection, ISectionBlueprint
from collective.transmogrifier.utils import defaultMatcher

from quintagroup.transmogrifier.interfaces import IExportDataCorrector, IImportDataCorrector
from quintagroup.transmogrifier.adapters.exporting import ReferenceExporter
from quintagroup.transmogrifier.manifest import ManifestExporterSection
from quintagroup.transmogrifier.logger import VALIDATIONKEY

from quintagroup.transmogrify.simpleblog2quills.interfaces import IExportItemManipulator, IBlog

# URL of the site, where blog is located (this is needed to fix links in entries)
SITE_URLS = []
IMAGE_FOLDER = 'images'
IMAGE_FOLDER_TYPE = 'Large Plone Folder'
# this registries are needed to avoid loosing images with equal ids
IMAGE_IDS = []
IMAGE_PATHS = {}

class BlogManifest(object):
    implements(IExportDataCorrector)

    def __init__(self, context):
        self.context = context

    def __call__(self, data):
        # flag that indicated whether 'images' folder must added to manifest
        need_to_add = True

        doc = minidom.parseString(data['data'])
        root = doc.documentElement
        for child in root.getElementsByTagName('record'):
            if child.getAttribute('type') not in  ('BlogEntry', 'BlogFolder'):
                root.removeChild(child)
            elif str(child.firstChild.nodeValue.strip()) == IMAGE_FOLDER:
                # blog already contains object with IMAGE_FOLDER id
                need_to_add = False

        if need_to_add:
            folder = doc.createElement('record')
            folder.setAttribute('type', IMAGE_FOLDER_TYPE)
            folder.appendChild(doc.createTextNode(IMAGE_FOLDER))
            root.appendChild(folder)

        data['data'] = doc.toxml('utf-8')
        return data

class BlogFolderManifest(object):
    implements(IExportDataCorrector)

    def __init__(self, context):
        self.context = context

    def __call__(self, data):
        doc = minidom.parseString(data['data'])
        root = doc.documentElement
        for child in root.getElementsByTagName('record'):
            if child.getAttribute('type') not in  ('BlogEntry', 'BlogFolder'):
                root.removeChild(child)
        data['data'] = doc.toxml('utf-8')
        return data

class BlogEntryManifest(object):
    implements(IExportItemManipulator)

    def __init__(self, context):
        self.context = context

    def __call__(self, item, **kw):
        # remove manifest data from item - content contained in BlogEntry isn't exported
        if '_files' in item and 'manifest' in item['_files']:
            del item['_files']['manifest']
        return item

def recurseToInterface(item, ifaces):
    """Recurse up the aq_chain until an object providing `iface' is found,
    and return that.
    """
    if not isinstance(ifaces, (ListType, TupleType)):
        ifaces = [ifaces]
    parent = item.aq_parent
    for iface in ifaces:
        if iface.providedBy(item):
            return item
    for iface in ifaces:
        if iface.providedBy(parent):
            return parent
    if isinstance(parent, PloneSite):
        # Stop when we get to the portal root.
        return None
    return recurseToInterface(parent, ifaces)

def getUniqueId(image_id):
    """ Generate id that is unique in IMAGE_IDS registry.
    """
    if '.' in image_id:
        name, ext = image_id.rsplit('.', 1)
        ext = '.' + ext
    else:
        name, ext = image_id, ''
    if image_id in IMAGE_IDS:
        c = 1
        new_id = name + str(c) + ext
        while new_id in IMAGE_IDS:
            c += 1
            new_id = name + str(c) + ext
        image_id = new_id

    return image_id

class BlogEntryExporter(ReferenceExporter):
    implements(IExportDataCorrector)

    SRC = re.compile(r'src="([^"]+)"')

    def __init__(self, context):
        self.context = context
        self.portal_url = utils.getToolByName(self.context, 'portal_url')
        self.portal = self.portal_url.getPortalObject()

    def __call__(self, data):
        data = super(BlogEntryExporter, self).__call__(data)
        doc = minidom.parseString(data['data'])
        try:
            elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'body'][0]
        except IndexError:
            return data

        text = elem.firstChild.nodeValue
        urls = self.SRC.findall(text)
        blog = recurseToInterface(self.context, IBlog)
        blog_path = blog.getPhysicalPath()
        context_path = self.context.getPhysicalPath()
        for url in urls:
            url = str(url)
            image_id = url.rsplit('/', 1)[-1]
            # skip links with illegal url schema
            if '://' in url and not url.startswith('http://'):
                continue
            # convert all all links to relative
            if url.startswith('http://'):
                for site in SITE_URLS:
                    if url.startswith(site):
                        # check whether image is stored in blog
                        relative_url = url[len(site):]
                        relative_url = relative_url.strip('/')
                        # if link is broken we'll get an AttributeError
                        try:
                            image = self.portal.unrestrictedTraverse(relative_url)
                        except AttributeError:
                            break
                        in_blog = recurseToInterface(image, IBlog) is not None and True or False
                        if in_blog:
                            image_id = self.fixImageId(image, image_id, blog_path)
                            level = len(context_path) - len(blog_path) - 1
                            new_url = '/'.join(['..' for i in range(level)])
                            new_url = '/'.join((new_url, IMAGE_FOLDER, image_id))
                            text = text.replace(url, new_url, 1)
                        else:
                            # find how many levels self.context is under portal root
                            level = len(context_path) - 3
                            new_url = '/'.join(['..' for i in range(level)])
                            new_url  = new_url + '/' + relative_url
                            text = text.replace(url, new_url, 1)
                        break
            else:
                if url.startswith('/'):
                    # if link is broken we'll get an AttributeError
                    try:
                        image = self.portal.unrestrictedTraverse(url.strip('/'))
                    except AttributeError:
                        continue
                else:
                    # if link is broken we'll get an AttributeError
                    try:
                        image = self.context.unrestrictedTraverse(url)
                    except AttributeError:
                        continue
                in_blog = recurseToInterface(image, IBlog) is not None and True or False
                if in_blog:
                    image_id = self.fixImageId(image, image_id, blog_path)
                    level = len(context_path) - len(blog_path) - 1
                    new_url = '/'.join(['..' for i in range(level)])
                    new_url = '/'.join([new_url, IMAGE_FOLDER, image_id])
                    text = text.replace(url, new_url, 1)
                elif url.startswith('../'):
                    # remove '../' from the start of string
                    new_url = url[3:]
                    text = text.replace(url, new_url, 1)
                elif url.startswith('/'):
                    # these links didn't work so rewrite them with '..'
                    # find how many levels self.context is under portal root
                    level = len(context_path) - 3
                    new_url = '/'.join(['..' for i in range(level)])
                    new_url  = new_url + url
                    text = text.replace(url, new_url, 1)

        elem.firstChild.nodeValue = text
        data['data'] = doc.toxml('utf-8')
        return data

    def fixImageId(self, image, image_id, blog_path):
        """ Check whether image is good or generate new if it's bad.
        """
        image_path = '/'.join(image.getPhysicalPath())
        if image_id in IMAGE_IDS and image_path not in IMAGE_PATHS:
            image_id = getUniqueId(image_id)
        if image_id not in IMAGE_IDS:
            IMAGE_IDS.append(image_id)
            IMAGE_PATHS[image_path] = '/'.join(blog_path[2:] + (IMAGE_FOLDER, image_id))

        return image_id

class PathRewriter(object):
    implements(IExportItemManipulator)

    def __init__(self, context):
        self.context = context

    def __call__(self, item, **kw):
        pathkey = kw.get('path')
        if pathkey is None:
            return item

        path = item[pathkey]
        blog = recurseToInterface(self.context, IBlog)
        if blog is None:
            return item

        blog_path = blog.getPhysicalPath()
        full_path = '/'.join(self.context.getPhysicalPath())
        image_id = path.rsplit('/', 1)[-1]
        modified = False

        if full_path in IMAGE_PATHS:
            new_path = IMAGE_PATHS[full_path]
        else:
            unique_id = getUniqueId(image_id)
            modified = image_id != unique_id
            new_path = '/'.join(blog_path[2:] + (IMAGE_FOLDER, unique_id))

            IMAGE_IDS.append(image_id)
            IMAGE_PATHS[full_path] = new_path

        # change item's path
        item[pathkey] = new_path
        item['_oldpath'] = path

        # now we need to fix object id in .marshall.xml
        if modified:
            if '_files' in item and 'marshall' in item['_files']:
                doc = minidom.parseString(item['_files']['marshall']['data'])
                elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'id'][0]
                elem.firstChild.nodeValue = '\n\t\t%s\n\t' % unique_id
                item['_files']['marshall']['data'] = doc.toxml('utf-8')

        return item

class ImageFolderSection(object):
    """ This section will generate manifest files for image folders in blog.
    """
    classProvides(ISectionBlueprint)
    implements(ISection)

    def __init__(self, transmogrifier, name, options, previous):
        self.previous = previous
        self.transmogrifier = transmogrifier

        self.flagkey = defaultMatcher(options, 'old-path-key', name, 'oldpath')
        self.typekey = defaultMatcher(options, 'type-key', name, 'type')
        self.pathkey = defaultMatcher(options, 'path-key', name, 'path')


        site_urls = options.get('site-urls', '')
        site_urls = filter(None, [i.strip() for i in site_urls.splitlines()])
        for i in site_urls:
            SITE_URLS.append(i)

        self.anno = IAnnotations(transmogrifier)

    def __iter__(self):
        folders = {}

        # safely get logging storage
        if VALIDATIONKEY in self.anno:
            log_storage = self.anno[VALIDATIONKEY]
        else:
            log_storage = None

        for item in self.previous:
            item_keys = item.keys()
            pathkey = self.pathkey(*item_keys)[0]
            typekey = self.typekey(*item_keys)[0]
            oldpathkey = self.flagkey(*item_keys)[0]

            # collect data about images moved to folders
            if pathkey and typekey and oldpathkey:
                path = item[pathkey]
                old_path = item[oldpathkey]
                type_ = item[typekey]
                folder_path, image_id = path.rsplit('/', 1)
                folders.setdefault(folder_path, []).append((image_id, type_))

                # update logging data (path) for this item
                if log_storage and log_storage[-1] == old_path:
                    log_storage.pop()
                    log_storage.append(path)

            yield item

        # generate manifests for those image folders
        items = []
        for folder, entries in folders.items():
            items.append({'_entries': entries, pathkey: folder})
        exporter = ManifestExporterSection(self.transmogrifier, 'manifest', {'blueprint': 'manifest'}, iter(items))
        for item in exporter:
            yield item

        # clean registries
        while IMAGE_IDS: IMAGE_IDS.pop()
        while SITE_URLS: SITE_URLS.pop()
        IMAGE_PATHS.clear()

class WorkflowImporter(object):
    """ This adapter tries to convert all possible workflow histories to 
        simple_publication_workflow history.
    """
    implements(IImportDataCorrector)

    def __init__(self, context):
        self.context = context

    def __call__(self, data):
        doc = minidom.parseString(data['data'])
        wh = [i for i in doc.getElementsByTagName('cmf:workflow')]
        if not wh:
            # we don't have such workflow history
            return data

        wh = wh[0]
        workflow_id = wh.getAttribute('id')
        if workflow_id == 'simple_publication_workflow':
            return data
        wh.setAttribute('id', 'simple_publication_workflow')
        if workflow_id == 'simpleblog_workflow':
            self.fixSimpleBlogWorkflow(wh)
        else:
            self.fixWorkflow(wh)

        data['data'] = doc.toxml('utf-8')
        return data

    def fixSimpleBlogWorkflow(self, wh):
        for history in wh.getElementsByTagName('cmf:history'):
            for var in history.getElementsByTagName('cmf:var'):
                id_ = var.getAttribute('id')
                value = var.getAttribute('value')
                if id_ == 'review_state' and value == 'draft':
                    var.setAttribute('value', 'private')

    def fixWorkflow(self, wh):
        for history in wh.getElementsByTagName('cmf:history'):
            for var in history.getElementsByTagName('cmf:var'):
                id_ = var.getAttribute('id')
                value = var.getAttribute('value')
                if id_ == 'review_state' and value == 'visible':
                    var.setAttribute('value', 'published')