| 1 | from collective.imstransport.utilities.imsinterchange import IMSReader |
|---|
| 2 | from collective.imstransport.utilities.packagingio import ZipfileReader |
|---|
| 3 | from collective.imstransport.utilities.bb.bbreader import BBReader |
|---|
| 4 | from zope.component import getUtility |
|---|
| 5 | from collective.imstransport.utilities.interfaces import IIMSObjectCreator |
|---|
| 6 | from xml.parsers.expat import ExpatError |
|---|
| 7 | from collective.imstransport.IMS_exceptions import ManifestError |
|---|
| 8 | from BeautifulSoup import BeautifulSoup |
|---|
| 9 | from Products.CMFCore.interfaces import ISiteRoot |
|---|
| 10 | import htmlentitydefs |
|---|
| 11 | from configbb import EMBEDDED_STRING |
|---|
| 12 | import re |
|---|
| 13 | |
|---|
| 14 | class IMSBBReader(IMSReader): |
|---|
| 15 | """ Create objects from IMS manifest. """ |
|---|
| 16 | |
|---|
| 17 | def readPackage(self, file, context): |
|---|
| 18 | """ Read the manifest """ |
|---|
| 19 | source = ZipfileReader(file) |
|---|
| 20 | objDict = {} |
|---|
| 21 | if not source: |
|---|
| 22 | return False, 'Internal error. No source object specified' |
|---|
| 23 | bbreader = BBReader() |
|---|
| 24 | manifest = source.readManifest() |
|---|
| 25 | if not manifest: |
|---|
| 26 | raise ManifestError, 'Could not locate manifest file "imsmanifest.xml" in the zip archive.' |
|---|
| 27 | try: |
|---|
| 28 | doc = bbreader.parseManifest(manifest) |
|---|
| 29 | except ExpatError, e: |
|---|
| 30 | raise ManifestError, str(e) |
|---|
| 31 | tocpages = [] |
|---|
| 32 | orgs = bbreader.readOrganizations(doc) |
|---|
| 33 | resources = bbreader.readResources(doc) |
|---|
| 34 | for x in resources: |
|---|
| 35 | resid, restype, bbfile, bbtitle, bbase = bbreader.readResourceAttributes(x) |
|---|
| 36 | doctext = filetoc = restoc = '' |
|---|
| 37 | metadata = {} |
|---|
| 38 | if restype == 'resource/x-bb-document': |
|---|
| 39 | metadata = {} |
|---|
| 40 | # read the data file |
|---|
| 41 | if bbfile: |
|---|
| 42 | dataxml = source.readFile(bbfile) |
|---|
| 43 | resnode = bbreader.parseDataFile(dataxml) |
|---|
| 44 | metadata = bbreader.readMetadata(resnode) |
|---|
| 45 | if metadata.has_key('text') and metadata['text']: |
|---|
| 46 | mtext = metadata['text'] |
|---|
| 47 | if type(mtext) == type(u''): |
|---|
| 48 | mtext = mtext.encode('utf-8') |
|---|
| 49 | ptext = unquotehtml(mtext) |
|---|
| 50 | utils = getUtility(ISiteRoot).plone_utils |
|---|
| 51 | soup = BeautifulSoup(ptext) |
|---|
| 52 | doctext = bbreader.runDocumentFilters(utils, soup, [(EMBEDDED_STRING, 'embedded'),], bbase) |
|---|
| 53 | # Handle Files |
|---|
| 54 | files = bbreader.readFiles(x, bbase) |
|---|
| 55 | entries = [] |
|---|
| 56 | for y in files: |
|---|
| 57 | dhash = resid + y |
|---|
| 58 | objDict[dhash] = {} |
|---|
| 59 | dexcludeFromNav = True |
|---|
| 60 | dfile = y |
|---|
| 61 | dfileparts = y.split('/') |
|---|
| 62 | # File is embedded |
|---|
| 63 | if 'embedded' in y: |
|---|
| 64 | # Link is encrypted |
|---|
| 65 | if len(dfileparts) > 0 and dfileparts[-1][0] == '!' and doctext: |
|---|
| 66 | soup = BeautifulSoup(doctext) |
|---|
| 67 | embeddedpath = bbreader.readEmbeddedTags(soup) |
|---|
| 68 | if embeddedpath: |
|---|
| 69 | dfile = embeddedpath |
|---|
| 70 | dfilepath = self.createPathFromFile(dfile) |
|---|
| 71 | did = self.createIdFromFile(dfile) |
|---|
| 72 | dtype = self.determineType(objDict[dhash], dfile) |
|---|
| 73 | portal = getUtility(ISiteRoot) |
|---|
| 74 | utils = portal.plone_utils |
|---|
| 75 | did = utils.normalizeString(did) |
|---|
| 76 | dtitle = did |
|---|
| 77 | if dfilepath: |
|---|
| 78 | linkpath = '%s/%s' %(dfilepath, did) |
|---|
| 79 | else: |
|---|
| 80 | linkpath = did |
|---|
| 81 | binfile = y |
|---|
| 82 | folder_files = source.getFolderFiles(dfilepath) |
|---|
| 83 | if len(folder_files) == 1: |
|---|
| 84 | binfile = folder_files[0] |
|---|
| 85 | entries.append((linkpath, dtitle)) |
|---|
| 86 | self.applyCoreMetadata(objDict[dhash], did, dfilepath, dexcludeFromNav, dtype, dtitle, file=binfile) |
|---|
| 87 | if entries: |
|---|
| 88 | filetoc = bbreader.createTocPage(entries) |
|---|
| 89 | # Handle links |
|---|
| 90 | if metadata.has_key('bbtype') and metadata['bbtype'] == 'Link': |
|---|
| 91 | hash = resid |
|---|
| 92 | objDict[hash] = metadata |
|---|
| 93 | filepath = '' |
|---|
| 94 | id = resid + '.link' |
|---|
| 95 | excludeFromNav = True |
|---|
| 96 | ptype = metadata['bbtype'] |
|---|
| 97 | if orgs.has_key(resid): |
|---|
| 98 | title = orgs[resid] |
|---|
| 99 | else: |
|---|
| 100 | title = id |
|---|
| 101 | self.applyCoreMetadata(objDict[hash], id, filepath, excludeFromNav, ptype, title) |
|---|
| 102 | elif restype in ['resource/x-bb-document', 'course/x-bb-coursetoc']: |
|---|
| 103 | # Handle normal bb-documents |
|---|
| 104 | hash = resid |
|---|
| 105 | objDict[hash] = metadata |
|---|
| 106 | filepath = '' |
|---|
| 107 | excludeFromNav = True |
|---|
| 108 | ptype = 'Document' |
|---|
| 109 | id = resid + '.html' |
|---|
| 110 | if orgs.has_key(resid): |
|---|
| 111 | title = orgs[resid] |
|---|
| 112 | else: |
|---|
| 113 | title = id |
|---|
| 114 | # It's a folder object: |
|---|
| 115 | isFolder = metadata.has_key('bbtype') and metadata['bbtype'] == 'Folder' |
|---|
| 116 | if isFolder or restype == 'course/x-bb-coursetoc': |
|---|
| 117 | tocpages.append(resid) |
|---|
| 118 | # It's a table of contents object |
|---|
| 119 | if restype == 'course/x-bb-coursetoc': |
|---|
| 120 | if orgs.has_key(resid): |
|---|
| 121 | excludeFromNav = False |
|---|
| 122 | orgstitle = orgs[resid].split('.') |
|---|
| 123 | # Rewrite label tag |
|---|
| 124 | if len(orgstitle) > 1 and orgstitle[-1] == 'label': |
|---|
| 125 | title = re.sub('(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))', ' \\1', orgs[resid].split('.')[1]) |
|---|
| 126 | text = doctext |
|---|
| 127 | if type(filetoc) == type(u''): |
|---|
| 128 | text += filetoc.encode('utf-8') |
|---|
| 129 | self.applyCoreMetadata(objDict[hash], id, filepath, excludeFromNav, ptype, title, text=text) |
|---|
| 130 | # Build table of contents pages |
|---|
| 131 | for z in tocpages: |
|---|
| 132 | tocitems = bbreader.readTocItem(doc, z) |
|---|
| 133 | entries = [] |
|---|
| 134 | for titem in tocitems: |
|---|
| 135 | if objDict.has_key(titem): |
|---|
| 136 | met = objDict[titem] |
|---|
| 137 | path = met['path'] |
|---|
| 138 | if path: |
|---|
| 139 | linkpath = '%s/%s' %(path, objDict[titem]['id']) |
|---|
| 140 | else: |
|---|
| 141 | linkpath = objDict[titem]['id'] |
|---|
| 142 | entries.append((linkpath, met['title'])) |
|---|
| 143 | if entries: |
|---|
| 144 | if objDict[z].has_key('text') and objDict[z]['text']: |
|---|
| 145 | gtext = objDict[z]['text'] |
|---|
| 146 | if type(gtext) == type(u''): |
|---|
| 147 | gtext = gtext.encode('utf-8') |
|---|
| 148 | objDict[z]['text'] = bbreader.createTocPage(entries).encode('utf-8') + gtext |
|---|
| 149 | else: |
|---|
| 150 | objDict[z]['text'] = bbreader.createTocPage(entries).encode('utf-8') |
|---|
| 151 | else: |
|---|
| 152 | objDict[z]['excludeFromNav'] = True |
|---|
| 153 | objcreator = getUtility(IIMSObjectCreator) |
|---|
| 154 | objcreator.createObjects(objDict, context, source) |
|---|
| 155 | |
|---|
| 156 | def applyCoreMetadata(self, metadata, id, path, excludeFromNav, type, title, file=None, text=None): |
|---|
| 157 | """ Helper function for applying metadata """ |
|---|
| 158 | portal = getUtility(ISiteRoot) |
|---|
| 159 | utils = portal.plone_utils |
|---|
| 160 | metadata['id'] = utils.normalizeString(id) |
|---|
| 161 | metadata['path'] = path |
|---|
| 162 | metadata['excludeFromNav'] = excludeFromNav |
|---|
| 163 | metadata['type'] = type |
|---|
| 164 | if not (metadata.has_key('title') and metadata['title']): |
|---|
| 165 | metadata['title'] = title |
|---|
| 166 | if file: |
|---|
| 167 | metadata['file'] = file |
|---|
| 168 | if text: |
|---|
| 169 | metadata['text'] = text |
|---|
| 170 | |
|---|
| 171 | def convertentity(m): |
|---|
| 172 | """Convert a HTML entity into normal string (ISO-8859-1)""" |
|---|
| 173 | if m.group(1)=='#': |
|---|
| 174 | try: |
|---|
| 175 | return chr(int(m.group(2))) |
|---|
| 176 | except ValueError: |
|---|
| 177 | return '&#%s;' % m.group(2) |
|---|
| 178 | try: |
|---|
| 179 | return htmlentitydefs.entitydefs[m.group(2)] |
|---|
| 180 | except KeyError: |
|---|
| 181 | return '&%s;' % m.group(2) |
|---|
| 182 | |
|---|
| 183 | def unquotehtml(s): |
|---|
| 184 | """Convert a HTML quoted string into normal string (ISO-8859-1). |
|---|
| 185 | |
|---|
| 186 | Works with &#XX; and with > etc.""" |
|---|
| 187 | return re.sub(r'&(#?)(.+?);',convertentity,s) |
|---|