| 1 | from xml.dom import minidom |
|---|
| 2 | from urlparse import urlparse |
|---|
| 3 | from collective.imstransport.IMS_exceptions import ManifestError |
|---|
| 4 | from configbb import LOM_BB_namespace, XML_namespace, EMBEDDED_STRING |
|---|
| 5 | from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
|---|
| 6 | from collective.imstransport.utilities.interfaces import IIMSTransportUtility |
|---|
| 7 | |
|---|
| 8 | class BBReader(object): |
|---|
| 9 | |
|---|
| 10 | def parseManifest(self, manifest): |
|---|
| 11 | """ parse the manifest """ |
|---|
| 12 | return self.parseDataFile(manifest) |
|---|
| 13 | |
|---|
| 14 | def parseDataFile(self, dataxml): |
|---|
| 15 | """ Parse the datafile """ |
|---|
| 16 | return minidom.parseString(dataxml) |
|---|
| 17 | |
|---|
| 18 | def readOrganizations(self, manifest): |
|---|
| 19 | """ Read the organizations for the manifest """ |
|---|
| 20 | orgs = {} |
|---|
| 21 | organizations = manifest.getElementsByTagName('organizations') |
|---|
| 22 | if organizations: |
|---|
| 23 | organization_nodes = organizations[0].getElementsByTagName('organization') |
|---|
| 24 | if organization_nodes: |
|---|
| 25 | organization_node = organization_nodes[0] |
|---|
| 26 | item_nodes = organization_nodes[0].getElementsByTagName('item') |
|---|
| 27 | for item in item_nodes: |
|---|
| 28 | idref = item.getAttribute('identifierref') |
|---|
| 29 | titlenodes = item.getElementsByTagName('title') |
|---|
| 30 | if titlenodes: |
|---|
| 31 | title = self.getTextValue(titlenodes[0]) |
|---|
| 32 | orgs[idref] = title |
|---|
| 33 | return orgs |
|---|
| 34 | |
|---|
| 35 | def readResources(self, manifest): |
|---|
| 36 | """ Read all resources. """ |
|---|
| 37 | reslist = [] |
|---|
| 38 | resources = manifest.getElementsByTagName('resources') |
|---|
| 39 | if resources: |
|---|
| 40 | reslist = resources[0].getElementsByTagName('resource') |
|---|
| 41 | return reslist |
|---|
| 42 | |
|---|
| 43 | def getTextValue(self, node): |
|---|
| 44 | """ Removes the text from the text_node of a node """ |
|---|
| 45 | for x in node.childNodes: |
|---|
| 46 | if x.nodeType == x.TEXT_NODE: |
|---|
| 47 | return x.nodeValue.strip() |
|---|
| 48 | return None |
|---|
| 49 | |
|---|
| 50 | def readResourceAttributes(self, resource): |
|---|
| 51 | """ Return attributes on resource node. """ |
|---|
| 52 | return (resource.getAttribute('identifier'), |
|---|
| 53 | resource.getAttribute('type'), |
|---|
| 54 | resource.getAttributeNS(LOM_BB_namespace, 'file'), |
|---|
| 55 | resource.getAttributeNS(LOM_BB_namespace, 'title'), |
|---|
| 56 | resource.getAttributeNS(XML_namespace, 'base')) |
|---|
| 57 | |
|---|
| 58 | def readFiles(self, resource, bbase): |
|---|
| 59 | files = [] |
|---|
| 60 | flns = resource.getElementsByTagName('file') |
|---|
| 61 | if flns: |
|---|
| 62 | for fln in flns: |
|---|
| 63 | file = fln.getAttribute('href') |
|---|
| 64 | if bbase: |
|---|
| 65 | files.append('%s/%s' %(bbase, file)) |
|---|
| 66 | else: |
|---|
| 67 | files.append(file) |
|---|
| 68 | return files |
|---|
| 69 | |
|---|
| 70 | def readMetadata(self, content): |
|---|
| 71 | """ Read metadata from data files """ |
|---|
| 72 | md = {} |
|---|
| 73 | self.readContentMetadata(content, md) |
|---|
| 74 | return md |
|---|
| 75 | |
|---|
| 76 | def readContentMetadata(self, metadata, md): |
|---|
| 77 | """ Read the metadata from a content file """ |
|---|
| 78 | content_nodes = metadata.getElementsByTagName('CONTENT') |
|---|
| 79 | if content_nodes: |
|---|
| 80 | content_node = content_nodes[0] |
|---|
| 81 | title_nodes = content_node.getElementsByTagName('TITLE') |
|---|
| 82 | if title_nodes: |
|---|
| 83 | title = title_nodes[0].getAttribute('value') |
|---|
| 84 | md['title'] = title |
|---|
| 85 | body_nodes = content_node.getElementsByTagName('BODY') |
|---|
| 86 | if body_nodes: |
|---|
| 87 | text_nodes = body_nodes[0].getElementsByTagName('TEXT') |
|---|
| 88 | if text_nodes: |
|---|
| 89 | md['text'] = self.getTextValue(text_nodes[0]) |
|---|
| 90 | date_nodes = content_node.getElementsByTagName('DATES') |
|---|
| 91 | if date_nodes: |
|---|
| 92 | created_nodes = date_nodes[0].getElementsByTagName('CREATED') |
|---|
| 93 | if created_nodes: |
|---|
| 94 | md['creation_date'] = created_nodes[0].getAttribute('value') |
|---|
| 95 | flag_nodes = content_node.getElementsByTagName('FLAGS') |
|---|
| 96 | if flag_nodes: |
|---|
| 97 | isfolder_nodes = flag_nodes[0].getElementsByTagName('ISFOLDER') |
|---|
| 98 | if isfolder_nodes: |
|---|
| 99 | value = isfolder_nodes[0].getAttribute('value') |
|---|
| 100 | if value == 'true': |
|---|
| 101 | md['bbtype'] = 'Folder' |
|---|
| 102 | handler_nodes = content_node.getElementsByTagName('CONTENTHANDLER') |
|---|
| 103 | if handler_nodes: |
|---|
| 104 | value = handler_nodes[0].getAttribute('value') |
|---|
| 105 | if value == 'resource/x-bb-externallink': |
|---|
| 106 | url_nodes = content_node.getElementsByTagName('URL') |
|---|
| 107 | if url_nodes: |
|---|
| 108 | url = url_nodes[0].getAttribute('value') |
|---|
| 109 | if url: |
|---|
| 110 | md['bbtype'] = 'Link' |
|---|
| 111 | md['remoteUrl'] = url |
|---|
| 112 | |
|---|
| 113 | def readTocItem(self, manifest, resid): |
|---|
| 114 | """ Read the toc page and find child nodes """ |
|---|
| 115 | tocitems = [] |
|---|
| 116 | organizations = manifest.getElementsByTagName('organizations') |
|---|
| 117 | if organizations: |
|---|
| 118 | organization_nodes = organizations[0].getElementsByTagName('organization') |
|---|
| 119 | if organization_nodes: |
|---|
| 120 | organization_node = organization_nodes[0] |
|---|
| 121 | item_nodes = organization_nodes[0].getElementsByTagName('item') |
|---|
| 122 | for item in item_nodes: |
|---|
| 123 | idref = item.getAttribute('identifierref') |
|---|
| 124 | if idref == resid: |
|---|
| 125 | childitems = item.childNodes |
|---|
| 126 | for x in childitems: |
|---|
| 127 | if x.nodeName == 'item': |
|---|
| 128 | itemid = x.getAttribute('identifierref') |
|---|
| 129 | tocitems.append(itemid) |
|---|
| 130 | return tocitems |
|---|
| 131 | |
|---|
| 132 | def readEmbeddedTags(self, soup): |
|---|
| 133 | """ Read embedded tags from a text file """ |
|---|
| 134 | prevlink = None |
|---|
| 135 | links = self.getDocumentHrefLinks(soup) |
|---|
| 136 | for link in links: |
|---|
| 137 | if 'embedded' in link['href']: |
|---|
| 138 | if prevlink and prevlink != link['href']: |
|---|
| 139 | return None |
|---|
| 140 | prevlink = link['href'] |
|---|
| 141 | links = self.getDocumentSrcLinks(soup) |
|---|
| 142 | for link in links: |
|---|
| 143 | if 'embedded' in link['src']: |
|---|
| 144 | if prevlink and prevlink != link['src']: |
|---|
| 145 | return None |
|---|
| 146 | return prevlink |
|---|
| 147 | |
|---|
| 148 | def createTocPage(self, entries): |
|---|
| 149 | """ Create a table of links """ |
|---|
| 150 | text = '<table>' |
|---|
| 151 | for z in entries: |
|---|
| 152 | text += "<tr><td><a href='%s'>%s</a></td></tr>" %(z[0], z[1]) |
|---|
| 153 | text += '</table>' |
|---|
| 154 | return text |
|---|
| 155 | |
|---|
| 156 | def runDocumentFilters(self, utils, soup, vars, base): |
|---|
| 157 | """ Run a filter over the links """ |
|---|
| 158 | links = [] |
|---|
| 159 | links = self.getDocumentHrefLinks(soup) |
|---|
| 160 | for link in links: |
|---|
| 161 | orig = link['href'] |
|---|
| 162 | link['href'] = self.filterDocumentLink(link['href'], utils, vars, base) |
|---|
| 163 | links = [] |
|---|
| 164 | links = self.getDocumentSrcLinks(soup) |
|---|
| 165 | for link in links: |
|---|
| 166 | orig = link['src'] |
|---|
| 167 | link['src'] = self.filterDocumentLink(link['src'], utils, vars, base) |
|---|
| 168 | return soup.prettify() |
|---|
| 169 | |
|---|
| 170 | def getDocumentHrefLinks(self, soup): |
|---|
| 171 | links = [] |
|---|
| 172 | tags = soup.findAll(href=True) |
|---|
| 173 | from urlparse import urlparse |
|---|
| 174 | for tag in tags: |
|---|
| 175 | if tag.has_key('href'): |
|---|
| 176 | url = urlparse('href') |
|---|
| 177 | if not url[1] or 'localhost' in url[1]: |
|---|
| 178 | links.append(tag) |
|---|
| 179 | return links |
|---|
| 180 | |
|---|
| 181 | def getDocumentSrcLinks(self, soup): |
|---|
| 182 | links = [] |
|---|
| 183 | tags = soup.findAll(src=True) |
|---|
| 184 | from urlparse import urlparse |
|---|
| 185 | for tag in tags: |
|---|
| 186 | if tag.has_key('src'): |
|---|
| 187 | url = urlparse('src') |
|---|
| 188 | if not url[1] or 'localhost' in url[1]: |
|---|
| 189 | links.append(tag) |
|---|
| 190 | return links |
|---|
| 191 | |
|---|
| 192 | def _convertBBVariables(self, link, vars, base): |
|---|
| 193 | """ Convert BB variables to their counterparts """ |
|---|
| 194 | lnk = link |
|---|
| 195 | for var in vars: |
|---|
| 196 | if base: |
|---|
| 197 | replace = '%s/%s/' %(base, var[1]) |
|---|
| 198 | else: |
|---|
| 199 | replace = var[1] |
|---|
| 200 | lnk = link.replace(var[0], replace) |
|---|
| 201 | return lnk |
|---|
| 202 | |
|---|
| 203 | def _convertURLEntities(self, link): |
|---|
| 204 | """ Convert BB variables to their counterparts """ |
|---|
| 205 | lnk = link |
|---|
| 206 | import urllib |
|---|
| 207 | lnk = urllib.unquote(lnk) |
|---|
| 208 | return lnk |
|---|
| 209 | |
|---|
| 210 | def _convertToNormalizedLink(self, link, utils): |
|---|
| 211 | """ Normalize the link so it can be imported without errors """ |
|---|
| 212 | lnk = link |
|---|
| 213 | url = urlparse(lnk) |
|---|
| 214 | urlfile = url[2].split('/') |
|---|
| 215 | urlfile[-1] = utils.normalizeString(urlfile[-1]) |
|---|
| 216 | lnk = '/'.join(urlfile) |
|---|
| 217 | return lnk |
|---|
| 218 | |
|---|
| 219 | def filterDocumentLink(self, link, utils, vars, base): |
|---|
| 220 | lnk = link |
|---|
| 221 | from urlparse import urlparse |
|---|
| 222 | url = urlparse(lnk) |
|---|
| 223 | if url[2] and not url[0]: |
|---|
| 224 | lnk = self._convertBBVariables(lnk, vars, base) |
|---|
| 225 | lnk = self._convertURLEntities(lnk) |
|---|
| 226 | lnk = self._convertToNormalizedLink(lnk, utils) |
|---|
| 227 | return lnk |
|---|
| 228 | |
|---|
| 229 | def runFilters(self, text, filters, **kw): |
|---|
| 230 | rettext = text |
|---|
| 231 | for filter in filters: |
|---|
| 232 | if 'embed' == filter: |
|---|
| 233 | rettext = self.replaceEmbedVariables(rettext, kw['base']) |
|---|
| 234 | elif 'reflinks' == filter: |
|---|
| 235 | rettext = self.rewriteReferenceLinks(rettext) |
|---|
| 236 | return rettext |
|---|
| 237 | |
|---|
| 238 | def replaceEmbedVariables(self, text, base): |
|---|
| 239 | """ Remove the proprietary embed variables and replace with proper path """ |
|---|
| 240 | if base: |
|---|
| 241 | rpath = '%s/embed/' %base |
|---|
| 242 | return text.replace(EMBEDDED_STRING, rpath) |
|---|
| 243 | |
|---|
| 244 | def rewriteReferenceLinks(self, text): |
|---|
| 245 | """ Rewrite all reference links """ |
|---|
| 246 | |
|---|
| 247 | def removeUrlEntities(self, href): |
|---|
| 248 | """ Remove the URL entities from the string """ |
|---|
| 249 | |
|---|
| 250 | |
|---|
| 251 | |
|---|
| 252 | |
|---|
| 253 | |
|---|
| 254 | |
|---|
| 255 | |
|---|
| 256 | |
|---|
| 257 | |
|---|
| 258 | |
|---|
| 259 | |
|---|
| 260 | |
|---|
| 261 | |
|---|
| 262 | |
|---|
| 263 | |
|---|
| 264 | |
|---|
| 265 | |
|---|
| 266 | |
|---|
| 267 | |
|---|
| 268 | |
|---|
| 269 | |
|---|
| 270 | |
|---|
| 271 | |
|---|
| 272 | |
|---|
| 273 | |
|---|