source: collective.imstransport/trunk/collective/imstransport/utilities/bb/bbreader.py @ 378

Revision 378, 9.7 KB checked in by jon, 4 years ago (diff)

Changing blackboard import to accomodate tufts blackboard packages

Line 
1from xml.dom import minidom
2from urlparse import urlparse
3from collective.imstransport.IMS_exceptions import ManifestError
4from configbb import LOM_BB_namespace, XML_namespace, EMBEDDED_STRING
5from Products.PageTemplates.PageTemplateFile import PageTemplateFile
6from collective.imstransport.utilities.interfaces import IIMSTransportUtility
7
8class BBReader(object):
9
10    def parseManifest(self, manifest):
11        """ parse the manifest """
12        return self.parseDataFile(manifest)
13
14    def parseDataFile(self, dataxml):
15        """ Parse the datafile """
16        return minidom.parseString(dataxml)
17
18    def readOrganizations(self, manifest):
19        """ Read the organizations for the manifest """
20        orgs = {}
21        organizations = manifest.getElementsByTagName('organizations')
22        if organizations:
23            organization_nodes = organizations[0].getElementsByTagName('organization')
24            if organization_nodes:
25                organization_node = organization_nodes[0]
26                item_nodes = organization_nodes[0].getElementsByTagName('item')
27                for item in item_nodes:
28                    idref = item.getAttribute('identifierref')
29                    titlenodes = item.getElementsByTagName('title')
30                    if titlenodes:
31                        title = self.getTextValue(titlenodes[0])
32                        orgs[idref] =  title
33        return orgs
34
35    def readResources(self, manifest):
36        """ Read all resources. """
37        reslist = []
38        resources = manifest.getElementsByTagName('resources')
39        if resources:
40            reslist =  resources[0].getElementsByTagName('resource')
41        return reslist
42
43    def getTextValue(self, node):
44        """ Removes the text from the text_node of a node """
45        for x in node.childNodes:
46            if x.nodeType == x.TEXT_NODE:
47                return x.nodeValue.strip()
48        return None
49
50    def readResourceAttributes(self, resource):
51        """ Return attributes on resource node. """
52        return (resource.getAttribute('identifier'),
53                resource.getAttribute('type'),
54                resource.getAttributeNS(LOM_BB_namespace, 'file'),
55                resource.getAttributeNS(LOM_BB_namespace, 'title'),
56                resource.getAttributeNS(XML_namespace, 'base'))
57
58    def readFiles(self, resource, bbase):
59        files = []
60        flns = resource.getElementsByTagName('file')
61        if flns:
62            for fln in flns:
63                file = fln.getAttribute('href')
64                if bbase:
65                    files.append('%s/%s' %(bbase, file))
66                else:
67                    files.append(file)
68        return files
69
70    def readMetadata(self, content):
71        """ Read metadata from data files """
72        md = {}
73        self.readContentMetadata(content, md)
74        return md
75
76    def readContentMetadata(self, metadata, md):
77        """ Read the metadata from a content file """
78        content_nodes = metadata.getElementsByTagName('CONTENT')
79        if content_nodes:
80            content_node = content_nodes[0]
81            title_nodes = content_node.getElementsByTagName('TITLE')       
82            if title_nodes:
83                title = title_nodes[0].getAttribute('value')
84                md['title'] = title   
85            body_nodes = content_node.getElementsByTagName('BODY')
86            if body_nodes:
87                text_nodes = body_nodes[0].getElementsByTagName('TEXT')
88                if text_nodes:
89                    md['text'] = self.getTextValue(text_nodes[0])
90            date_nodes = content_node.getElementsByTagName('DATES')
91            if date_nodes:
92                created_nodes = date_nodes[0].getElementsByTagName('CREATED')
93                if created_nodes:
94                    md['creation_date'] = created_nodes[0].getAttribute('value')
95            flag_nodes = content_node.getElementsByTagName('FLAGS')
96            if flag_nodes:
97                isfolder_nodes = flag_nodes[0].getElementsByTagName('ISFOLDER')
98                if isfolder_nodes:
99                    value = isfolder_nodes[0].getAttribute('value')
100                    if value ==  'true':
101                        md['bbtype'] = 'Folder'
102            handler_nodes = content_node.getElementsByTagName('CONTENTHANDLER')
103            if handler_nodes:
104                value = handler_nodes[0].getAttribute('value')
105                if value == 'resource/x-bb-externallink':
106                    url_nodes = content_node.getElementsByTagName('URL')
107                    if url_nodes:
108                        url = url_nodes[0].getAttribute('value')
109                        if url:
110                            md['bbtype'] = 'Link'
111                            md['remoteUrl'] = url       
112
113    def readTocItem(self, manifest, resid):
114        """ Read the toc page and find child nodes """
115        tocitems = []
116        organizations = manifest.getElementsByTagName('organizations')
117        if organizations:
118            organization_nodes = organizations[0].getElementsByTagName('organization')
119            if organization_nodes:
120                organization_node = organization_nodes[0]
121                item_nodes = organization_nodes[0].getElementsByTagName('item')
122                for item in item_nodes:
123                    idref = item.getAttribute('identifierref')
124                    if idref == resid:
125                        childitems = item.childNodes
126                        for x in childitems:
127                            if x.nodeName == 'item':
128                                itemid = x.getAttribute('identifierref')
129                                tocitems.append(itemid)
130        return tocitems
131
132    def readEmbeddedTags(self, soup):
133        """ Read embedded tags from a text file """
134        prevlink = None
135        links = self.getDocumentHrefLinks(soup)
136        for link in links:
137            if 'embedded' in link['href']:
138                if prevlink and prevlink != link['href']:
139                    return None
140                prevlink = link['href']
141        links = self.getDocumentSrcLinks(soup)
142        for link in links:
143            if 'embedded' in link['src']:
144                if prevlink  and prevlink != link['src']:
145                    return None
146        return prevlink
147
148    def createTocPage(self, entries):
149        """ Create a table of links """
150        text = '<table>'
151        for z in entries:
152            text += "<tr><td><a href='%s'>%s</a></td></tr>" %(z[0], z[1])
153        text += '</table>'
154        return text
155
156    def runDocumentFilters(self, utils, soup, vars, base):
157        """ Run a filter over the links """
158        links = []
159        links = self.getDocumentHrefLinks(soup)
160        for link in links:
161            orig = link['href']
162            link['href'] = self.filterDocumentLink(link['href'], utils, vars, base)
163        links = []
164        links = self.getDocumentSrcLinks(soup)
165        for link in links:
166            orig = link['src']
167            link['src'] = self.filterDocumentLink(link['src'], utils, vars, base)
168        return soup.prettify()
169
170    def getDocumentHrefLinks(self, soup):
171        links = []
172        tags = soup.findAll(href=True)
173        from urlparse import urlparse
174        for tag in tags:
175            if tag.has_key('href'):
176                url = urlparse('href')
177                if not url[1] or 'localhost' in url[1]:
178                    links.append(tag)
179        return links
180
181    def getDocumentSrcLinks(self, soup):
182        links = []
183        tags = soup.findAll(src=True)
184        from urlparse import urlparse
185        for tag in tags:
186            if tag.has_key('src'):
187                url = urlparse('src')
188                if not url[1] or 'localhost' in url[1]:
189                    links.append(tag)
190        return links
191
192    def _convertBBVariables(self, link, vars, base):
193        """ Convert BB variables to their counterparts """
194        lnk = link
195        for var in vars:
196            if base:
197                replace = '%s/%s/' %(base, var[1])
198            else:
199                replace = var[1]
200            lnk = link.replace(var[0], replace)
201        return lnk
202
203    def _convertURLEntities(self, link):
204        """ Convert BB variables to their counterparts """
205        lnk = link
206        import urllib
207        lnk = urllib.unquote(lnk)
208        return lnk
209
210    def _convertToNormalizedLink(self, link, utils):
211        """ Normalize the link so it can be imported without errors """
212        lnk = link
213        url = urlparse(lnk)
214        urlfile = url[2].split('/')
215        urlfile[-1] = utils.normalizeString(urlfile[-1])
216        lnk = '/'.join(urlfile)
217        return lnk
218       
219    def filterDocumentLink(self, link, utils, vars, base):
220        lnk = link
221        from urlparse import urlparse
222        url = urlparse(lnk)
223        if url[2] and not url[0]:
224            lnk = self._convertBBVariables(lnk, vars, base)
225            lnk = self._convertURLEntities(lnk)
226            lnk = self._convertToNormalizedLink(lnk, utils)
227        return lnk
228
229    def runFilters(self, text, filters, **kw):
230        rettext = text
231        for filter in filters:
232            if 'embed' == filter:
233                rettext = self.replaceEmbedVariables(rettext, kw['base'])
234            elif 'reflinks' == filter:
235                rettext = self.rewriteReferenceLinks(rettext)
236        return rettext
237
238    def replaceEmbedVariables(self, text, base):
239        """ Remove the proprietary embed variables and replace with proper path """
240        if base:
241            rpath = '%s/embed/' %base
242        return text.replace(EMBEDDED_STRING, rpath)
243
244    def rewriteReferenceLinks(self, text):
245        """ Rewrite all reference links """
246
247    def removeUrlEntities(self, href):
248        """ Remove the URL entities from the string """
249       
250       
251
252
253
254       
255
256
257
258
259       
260       
261
262       
263               
264               
265           
266
267           
268           
269               
270
271
272
273       
Note: See TracBrowser for help on using the repository browser.