| 1 | | # from zope.interface import implements |
| 2 | | # from collective.imstransport.utilities.interfaces import IIMSManifestReader, IIMSTransportUtility, IIMSObjectCreator |
| 3 | | from collective.imstransport import IMSTransportMessageFactory as _ |
| 4 | | # from zipfile import ZipFile |
| 5 | | # from zope.component import getUtility |
| 6 | | # from Products.CMFCore.interfaces import ISiteRoot |
| 7 | | # from elementtree import ElementTree |
| 8 | | # import re |
| 9 | | from zope.component import getUtility |
| 10 | | from collective.imstransport.utilities.imsinterchange import IMSInterchangeReader |
| | 1 | from xml.dom import minidom |
| | 2 | from collective.imstransport.IMS_exceptions import ManifestError |
| | 3 | from configbb import LOM_BB_namespace, XML_namespace |
| | 4 | |
| | 5 | class BBReader(object): |
| | 6 | |
| | 7 | def parseManifest(self, manifest): |
| | 8 | """ parse the manifest """ |
| | 9 | return self.parseDataFile(manifest) |
| | 10 | |
| | 11 | def parseDataFile(self, dataxml): |
| | 12 | """ Parse the datafile """ |
| | 13 | return minidom.parseString(dataxml) |
| | 14 | |
| | 15 | def readOrganizations(self, manifest): |
| | 16 | """ Read the organizations for the manifest """ |
| | 17 | orgs = {} |
| | 18 | organizations = manifest.getElementsByTagName('organizations') |
| | 19 | if organizations: |
| | 20 | organization_nodes = organizations[0].getElementsByTagName('organization') |
| | 21 | if organization_nodes: |
| | 22 | organization_node = organization_nodes[0] |
| | 23 | item_nodes = organization_nodes[0].getElementsByTagName('item') |
| | 24 | for item in item_nodes: |
| | 25 | idref = item.getAttribute('identifierref') |
| | 26 | titlenodes = item.getElementsByTagName('title') |
| | 27 | if titlenodes: |
| | 28 | title = self.getTextValue(titlenodes[0]) |
| | 29 | orgs[idref] = title |
| | 30 | return orgs |
| | 31 | |
| | 32 | def readResources(self, manifest): |
| | 33 | """ Read all resources. """ |
| | 34 | reslist = [] |
| | 35 | resources = manifest.getElementsByTagName('resources') |
| | 36 | if resources: |
| | 37 | reslist = resources[0].getElementsByTagName('resource') |
| | 38 | return reslist |
| | 39 | |
| | 40 | def getTextValue(self, node): |
| | 41 | """ Removes the text from the text_node of a node """ |
| | 42 | for x in node.childNodes: |
| | 43 | if x.nodeType == x.TEXT_NODE: |
| | 44 | return x.nodeValue.strip() |
| | 45 | return None |
| | 46 | |
| | 47 | def readResourceAttributes(self, resource): |
| | 48 | """ Return attributes on resource node. """ |
| | 49 | return (resource.getAttribute('identifier'), |
| | 50 | resource.getAttribute('type'), |
| | 51 | resource.getAttributeNS(LOM_BB_namespace, 'file'), |
| | 52 | resource.getAttributeNS(LOM_BB_namespace, 'title'), |
| | 53 | resource.getAttributeNS(XML_namespace, 'base')) |
| | 54 | |
| | 55 | def readFiles(self, resource): |
| | 56 | files = [] |
| | 57 | flns = resource.getElementsByTagName('file') |
| | 58 | if flns: |
| | 59 | for fln in flns: |
| | 60 | file = fln.getAttribute('href') |
| | 61 | files.append(file) |
| | 62 | return files |
| | 63 | |
| | 64 | def readMetadata(self, content): |
| | 65 | """ Read metadata from data files """ |
| | 66 | md = {} |
| | 67 | self.readContentMetadata(content, md) |
| | 68 | return md |
| | 69 | |
| | 70 | def readContentMetadata(self, metadata, md): |
| | 71 | """ Read the metadata from a content file """ |
| | 72 | content_nodes = metadata.getElementsByTagName('CONTENT') |
| | 73 | if content_nodes: |
| | 74 | content_node = content_nodes[0] |
| | 75 | title_nodes = content_node.getElementsByTagName('TITLE') |
| | 76 | if title_nodes: |
| | 77 | title = title_nodes[0].getAttribute('value') |
| | 78 | md['title'] = title |
| | 79 | body_nodes = content_node.getElementsByTagName('BODY') |
| | 80 | if body_nodes: |
| | 81 | text_nodes = body_nodes[0].getElementsByTagName('TEXT') |
| | 82 | if text_nodes: |
| | 83 | md['text'] = self.getTextValue(text_nodes[0]) |
| | 84 | date_nodes = content_node.getElementsByTagName('DATES') |
| | 85 | if date_nodes: |
| | 86 | created_nodes = date_nodes[0].getElementsByTagName('CREATED') |
| | 87 | if created_nodes: |
| | 88 | md['creation_date'] = created_nodes[0].getAttribute('value') |
| | 89 | flag_nodes = content_node.getElementsByTagName('FLAGS') |
| | 90 | if flag_nodes: |
| | 91 | isfolder_nodes = flag_nodes[0].getElementsByTagName('ISFOLDER') |
| | 92 | if isfolder_nodes: |
| | 93 | value = isfolder_nodes[0].getAttribute('value') |
| | 94 | if value == 'true': |
| | 95 | md['type'] = 'Folder' |
| | 96 | |
| | 97 | def readTocItem(self, manifest, resid): |
| | 98 | """ Read the toc page and find child nodes """ |
| | 99 | tocitems = [] |
| | 100 | organizations = manifest.getElementsByTagName('organizations') |
| | 101 | if organizations: |
| | 102 | organization_nodes = organizations[0].getElementsByTagName('organization') |
| | 103 | if organization_nodes: |
| | 104 | organization_node = organization_nodes[0] |
| | 105 | item_nodes = organization_nodes[0].getElementsByTagName('item') |
| | 106 | for item in item_nodes: |
| | 107 | idref = item.getAttribute('identifierref') |
| | 108 | if idref == resid: |
| | 109 | childitems = item.getElementsByTagName('item') |
| | 110 | for x in childitems: |
| | 111 | itemid = x.getAttribute('identifierref') |
| | 112 | tocitems.append(itemid) |
| | 113 | return tocitems |
| | 114 | |
| | 115 | |
| | 116 | |
| | 117 | |
| | 118 | |
| | 119 | |
| | 120 | |
| | 121 | |
| | 122 | |
| 29 | | # Read in resources section |
| 30 | | self.tree = ElementTree.XML(imsdoc) |
| 31 | | |
| 32 | | # Enter resource data into a dictionary |
| 33 | | resource_dictionary = self.readResources({}, self.tree) |
| 34 | | |
| 35 | | self.zf.close() |
| 36 | | |
| 37 | | return resource_dictionary |
| 38 | | |
| 39 | | def readResources(self, resDict, tree): |
| 40 | | |
| 41 | | resourceNodes = tree.findall('resources/resource') |
| 42 | | |
| 43 | | for resourceNode in resourceNodes: |
| 44 | | self.readFiles(resDict, resourceNode) |
| 45 | | |
| 46 | | return resDict |
| 47 | | |
| 48 | | def readFiles(self, resDict, resourceNode): |
| 49 | | |
| 50 | | # Get information about the resource. |
| 51 | | # |
| 52 | | res_title = resourceNode.get('{%s}title' %(self.BBNS)) |
| 53 | | # Determining which section it is in (e.g., Assignments, CourseInformation, ExternalLinks...) |
| 54 | | res_title_parts = res_title.split(".") |
| 55 | | if len(res_title_parts) == 4: |
| 56 | | if res_title_parts[0] == 'COURSE_DEFAULT' and res_title_parts[2] == 'CONTENT_LINK' and res_title_parts[3] == 'label': |
| 57 | | # Converts camel case section title (e.g., CourseInformation) to its non-camelcase alternative and sets as title |
| 58 | | res_title = re.sub('(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))', ' \\1', res_title_parts[1]).strip(' ') |
| 59 | | # Hash datfile information |
| 60 | | datfile = resourceNode.get('{%s}file' %(self.BBNS)) |
| 61 | | xmlbase = resourceNode.get('{%s}base' %(self.XML)) |
| 62 | | # Hash the blackboard type information for the resource |
| 63 | | bbtype = resourceNode.get('type') |
| 64 | | # Hash the file information |
| 65 | | fileNodes = resourceNode.findall('file') |
| 66 | | id = resourceNode.get('identifier') |
| 67 | | |
| 68 | | |
| 69 | | # Parse dat file |
| 70 | | if datfile: |
| 71 | | datText = self.readContentFile(datfile) |
| 72 | | else: |
| 73 | | datText = '' |
| 74 | | |
| 75 | | |
| 76 | | # Show in navigation? |
| 77 | | item_nodes_all = self.tree.findall("organizations//item") |
| 78 | | targetItems = [node for node in item_nodes_all if node.get("identifierref") == id] |
| 79 | | if targetItems: |
| 80 | | excludeFromNav = False |
| 81 | | # Are there any children? |
| 82 | | child_nodes = targetItems[0].findall('item') |
| 83 | | else: |
| 84 | | excludeFromNav = True |
| 85 | | child_nodes = [] |
| 86 | | |
| 87 | | |
| 88 | | if len(fileNodes): |
| 89 | | # Process resource with file nodes |
| 90 | | files = [] |
| 91 | | for fileNode in fileNodes: |
| 92 | | ## create the file object based on mimetype |
| 93 | | fileid = fileNode.get('href') |
| 94 | | files.append(('%s/%s/view' %(id, fileid),fileid)) |
| 95 | | type = self.determineFileMimetype(fileid) |
| 96 | | resDict[fileid] = {'file':fileid, 'path':id, 'title':fileid, 'id':fileid, 'type':type} |
| 97 | | resText = self.TocPage('Table of Contents', files) |
| 98 | | elif len(child_nodes): |
| 99 | | # Create table of contents pages. |
| 100 | | if not datText: |
| 101 | | items = [] |
| 102 | | for child_node in child_nodes: |
| 103 | | title_nodes = child_node.findall('title') |
| 104 | | if title_nodes: |
| 105 | | title = title_nodes[0].text |
| 106 | | else: |
| 107 | | title = '' |
| 108 | | items.append(('%s.html' %(child_node.get('identifierref')), title)) |
| 109 | | resText = self.TocPage('Table of Contents', items) |
| 110 | | else: |
| 111 | | resText = datText |
| 112 | | elif datText: |
| 113 | | resText = datText |
| 114 | | else: |
| 115 | | resText = '' |
| 116 | | |
| 117 | | resDict[id] = {'text':resText, 'path':'', 'title':res_title, 'id':'%s.html' %id, 'type':'Document'} |
| 118 | | |
| 119 | | |
| 120 | | def TocPage(self, tabletitle, tocitems): |
| 121 | | |
| 122 | | text = '' |
| 123 | | text += '<table class="documentTable" style="width: 499px;" border="0" cellpadding="0" cellspacing="0">' |
| 124 | | text += '<thead>' |
| 125 | | text += ' <tr>' |
| 126 | | text += ' <td>%s</td>' %tabletitle |
| 127 | | text += ' </tr>' |
| 128 | | text += '</thead>' |
| 129 | | text += '<tbody>' |
| 130 | | for tocitem in tocitems: |
| 131 | | text += ' <tr tal:define="oddrow repeat/item/odd;" ' |
| 132 | | text += ' tal:attributes="class oddrow)">' |
| 133 | | text += ' <td ><a href="%s"' %tocitem[0] |
| 134 | | text += ' >%s</a></td>' %tocitem[1] |
| 135 | | text += ' </tr>' |
| 136 | | text += ' </tbody>' |
| 137 | | text += '</table>' |
| 138 | | |
| 139 | | return text |
| 140 | | |
| 141 | | def readContentFile(self, datfile): |
| 142 | | |
| 143 | | # Read the proprietary blackboard.dat file |
| 144 | | datdoc = self.zf.read(datfile) |
| 145 | | |
| 146 | | # Parsing the dat file |
| 147 | | contenttree = ElementTree.XML(datdoc) |
| 148 | | text_nodes = contenttree.findall("BODY/TEXT") |
| 149 | | resText = '' |
| 150 | | |
| 151 | | # Get text information |
| 152 | | if text_nodes: |
| 153 | | resText = text_nodes[0].text |
| 154 | | |
| 155 | | return resText |
| 156 | | |
| 157 | | |
| 158 | | def determineFileMimetype(self, name): |
| 159 | | |
| 160 | | portal = getUtility(ISiteRoot) |
| 161 | | registry = portal.mimetypes_registry |
| 162 | | mimetype = registry.lookupExtension(name) |
| 163 | | |
| 164 | | if mimetype.major() == 'image': |
| 165 | | return 'Image' |
| 166 | | else: |
| 167 | | return 'File' |
| 168 | | |
| 169 | | |