source: collective.imstransport/trunk/collective/imstransport/utilities/bb/imsbbreader.py @ 378

Revision 378, 8.3 KB checked in by jon, 4 years ago (diff)

Changing blackboard import to accomodate tufts blackboard packages

Line 
1from collective.imstransport.utilities.imsinterchange import IMSReader
2from collective.imstransport.utilities.packagingio import ZipfileReader
3from collective.imstransport.utilities.bb.bbreader import BBReader
4from zope.component import getUtility
5from collective.imstransport.utilities.interfaces import IIMSObjectCreator
6from xml.parsers.expat import ExpatError
7from collective.imstransport.IMS_exceptions import ManifestError
8from BeautifulSoup import BeautifulSoup
9from Products.CMFCore.interfaces import ISiteRoot
10import htmlentitydefs
11from configbb import EMBEDDED_STRING
12import re
13
14class IMSBBReader(IMSReader):
15    """ Create objects from IMS manifest. """
16
17    def readPackage(self, file, context):
18        """ Read the manifest """
19        source = ZipfileReader(file)
20        objDict = {}
21        if not source:
22             return False, 'Internal error. No source object specified'
23        bbreader = BBReader()
24        manifest = source.readManifest()
25        if not manifest:
26            raise ManifestError, 'Could not locate manifest file "imsmanifest.xml" in the zip archive.'
27        try:
28            doc = bbreader.parseManifest(manifest)
29        except ExpatError, e:
30            raise ManifestError, str(e)
31        tocpages = []
32        orgs = bbreader.readOrganizations(doc)
33        resources = bbreader.readResources(doc)
34        for x in resources:
35            resid, restype, bbfile, bbtitle, bbase = bbreader.readResourceAttributes(x)
36            doctext = filetoc = restoc = ''
37            metadata = {}
38            if restype == 'resource/x-bb-document':
39                metadata = {}
40                # read the data file
41                if bbfile:
42                    dataxml = source.readFile(bbfile)
43                    resnode = bbreader.parseDataFile(dataxml)
44                    metadata = bbreader.readMetadata(resnode)
45                if metadata.has_key('text') and metadata['text']:
46                    mtext = metadata['text']                   
47                    if type(mtext) == type(u''):
48                        mtext = mtext.encode('utf-8')
49                    ptext = unquotehtml(mtext)
50                    utils = getUtility(ISiteRoot).plone_utils
51                    soup = BeautifulSoup(ptext)
52                    doctext = bbreader.runDocumentFilters(utils, soup, [(EMBEDDED_STRING, 'embedded'),], bbase)
53                # Handle Files
54                files = bbreader.readFiles(x, bbase)
55                entries = []
56                for y in files:
57                    dhash = resid + y
58                    objDict[dhash] = {}
59                    dexcludeFromNav = True
60                    dfile = y
61                    dfileparts = y.split('/')
62                    # File is embedded
63                    if 'embedded' in y:
64                        # Link is encrypted
65                        if len(dfileparts) > 0 and dfileparts[-1][0] == '!' and doctext:
66                            soup = BeautifulSoup(doctext)
67                            embeddedpath = bbreader.readEmbeddedTags(soup)
68                            if embeddedpath:
69                                dfile = embeddedpath
70                    dfilepath = self.createPathFromFile(dfile)
71                    did = self.createIdFromFile(dfile)
72                    dtype = self.determineType(objDict[dhash], dfile)
73                    portal = getUtility(ISiteRoot)
74                    utils = portal.plone_utils
75                    did = utils.normalizeString(did)
76                    dtitle = did
77                    if dfilepath:
78                        linkpath = '%s/%s' %(dfilepath, did)
79                    else:
80                        linkpath = did
81                    binfile = y
82                    folder_files = source.getFolderFiles(dfilepath)
83                    if len(folder_files) == 1:
84                        binfile = folder_files[0]
85                    entries.append((linkpath, dtitle))
86                    self.applyCoreMetadata(objDict[dhash], did, dfilepath, dexcludeFromNav, dtype, dtitle, file=binfile)
87                if entries:
88                    filetoc = bbreader.createTocPage(entries)
89            # Handle links
90            if metadata.has_key('bbtype') and metadata['bbtype'] == 'Link':
91                hash = resid
92                objDict[hash] = metadata
93                filepath = ''
94                id = resid + '.link'
95                excludeFromNav = True
96                ptype = metadata['bbtype']
97                if orgs.has_key(resid):
98                    title = orgs[resid]
99                else:
100                    title = id
101                self.applyCoreMetadata(objDict[hash], id, filepath, excludeFromNav, ptype, title)
102            elif restype in ['resource/x-bb-document', 'course/x-bb-coursetoc']:
103                # Handle normal bb-documents
104                hash = resid
105                objDict[hash] = metadata
106                filepath = ''
107                excludeFromNav = True
108                ptype = 'Document'
109                id = resid + '.html'
110                if orgs.has_key(resid):
111                    title = orgs[resid]
112                else:
113                    title = id
114                # It's a folder object:
115                isFolder = metadata.has_key('bbtype') and metadata['bbtype'] == 'Folder'
116                if isFolder or restype == 'course/x-bb-coursetoc':
117                    tocpages.append(resid)
118                # It's a table of contents object
119                if restype == 'course/x-bb-coursetoc':
120                    if orgs.has_key(resid):
121                        excludeFromNav = False
122                        orgstitle = orgs[resid].split('.')
123                        # Rewrite label tag
124                        if len(orgstitle) > 1 and orgstitle[-1] == 'label':
125                            title = re.sub('(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))', ' \\1', orgs[resid].split('.')[1])
126                text = doctext
127                if type(filetoc) == type(u''):
128                    text += filetoc.encode('utf-8')
129                self.applyCoreMetadata(objDict[hash], id, filepath, excludeFromNav, ptype, title, text=text)
130        # Build table of contents pages
131        for z in tocpages:
132            tocitems = bbreader.readTocItem(doc, z)
133            entries = []
134            for titem in tocitems:
135                if objDict.has_key(titem):
136                    met = objDict[titem]
137                    path = met['path']
138                    if path:
139                        linkpath = '%s/%s' %(path, objDict[titem]['id'])
140                    else:
141                        linkpath = objDict[titem]['id']
142                    entries.append((linkpath, met['title']))
143            if entries:
144                if objDict[z].has_key('text') and objDict[z]['text']:
145                    gtext = objDict[z]['text']
146                    if type(gtext) == type(u''):
147                        gtext = gtext.encode('utf-8')
148                    objDict[z]['text'] = bbreader.createTocPage(entries).encode('utf-8') + gtext
149                else:
150                    objDict[z]['text'] = bbreader.createTocPage(entries).encode('utf-8')
151            else:
152                objDict[z]['excludeFromNav'] = True
153        objcreator = getUtility(IIMSObjectCreator)
154        objcreator.createObjects(objDict, context, source)
155
156    def applyCoreMetadata(self, metadata, id, path, excludeFromNav, type, title, file=None, text=None):
157        """ Helper function for applying metadata """
158        portal = getUtility(ISiteRoot)
159        utils = portal.plone_utils
160        metadata['id'] = utils.normalizeString(id)
161        metadata['path'] = path
162        metadata['excludeFromNav'] = excludeFromNav
163        metadata['type'] = type
164        if not (metadata.has_key('title') and metadata['title']):
165            metadata['title'] = title
166        if file:
167            metadata['file'] =  file
168        if text:
169            metadata['text'] = text
170
171def convertentity(m):
172    """Convert a HTML entity into normal string (ISO-8859-1)"""
173    if m.group(1)=='#':
174        try:
175            return chr(int(m.group(2)))
176        except ValueError:
177            return '&#%s;' % m.group(2)
178    try:
179        return htmlentitydefs.entitydefs[m.group(2)]
180    except KeyError:
181        return '&%s;' % m.group(2)
182
183def unquotehtml(s):
184    """Convert a HTML quoted string into normal string (ISO-8859-1).
185
186    Works with &#XX; and with &nbsp; &gt; etc."""
187    return re.sub(r'&(#?)(.+?);',convertentity,s)
Note: See TracBrowser for help on using the repository browser.