source: enpraxis.staticsite/trunk/enpraxis/staticsite/utilities/staticsiteutility.py @ 292

Revision 292, 12.1 KB checked in by david, 4 years ago (diff)

broken code....don't be mad at me

Line 
1import os
2import re
3import string
4from BeautifulSoup import BeautifulSoup
5from Globals import BobobaseName
6from os import makedirs as os_makedirs
7from os.path import join as os_join
8from os.path import lexists as os_lexists
9from os.path import split as os_split
10
11from urllib2 import urlopen
12from urllib2 import HTTPError
13
14from OFS.SimpleItem import SimpleItem
15from zope.interface import implements
16from interfaces import IStaticSiteUtility
17from zope.component import getUtility, getMultiAdapter
18
19class StaticSiteUtility(SimpleItem):
20    """ Deploy a static site """
21
22    implements(IStaticSiteUtility)
23
24    def deploySite(self, context):
25        """ Deploy the site """
26
27        ssprops = context.portal_url.portal_properties.staticsite_properties
28        domain = ssprops.getProperty('domain')
29        dpath = self._getDeploymentPath(ssprops.getProperty('deployment_path'))
30        pstates = ssprops.getProperty('published_states')
31        self.base_files = ssprops.base_files
32        self.css_images = ssprops.css_images
33        self.extra_views = ssprops.extra_views
34       
35        self._deploySiteStructure(context, ssprops, domain, dpath)
36        self._deploySiteActions(context, dpath)
37
38        brains = context.portal_catalog.searchResults(
39            path={'query':'/'.join(context.getPhysicalPath()),
40                  'depth':1,},
41            review_state=pstates)
42        for brain in brains:
43            url = brain.getURL()
44            self.deployObject(url, brain, dpath, domain)
45            self.traverse(brain, dpath, domain, pstates)
46
47    def traverse(self, brain, dpath, domain, pstates):
48        """ Traverse the site. """
49        brains = brain.portal_catalog.searchResults(
50            path={'query':brain.getPath(), 'depth':1},
51                  review_state=pstates)
52        for brain in brains:
53            url = brain.getURL()
54            self.deployObject(url, brain, dpath, domain)
55            if brain.is_folderish:
56                self.traverse(brain, dpath, domain, pstates)
57           
58    def deployObject(self, url, brain, dpath, domain):
59        """ Deploy an object """
60        portal_url = brain.portal_url()
61        soup = ''
62        if brain.is_folderish:
63            #create dir before processing html         
64            objpath = self._getObjPath(url, portal_url, dpath)
65            self._createDirectory(objpath)
66            path = '%s/index.html' % objpath
67            raw = self._httpget(url)
68            soup = BeautifulSoup(raw)
69            #process document actions
70            self._deployDocumentActions(brain, portal_url, path, soup)
71            self.runFilters(brain, soup, dpath, portal_url)
72            self._writeFile(path, soup.prettify())
73        elif brain.Type in ['Page']:
74            path = self._getObjPath(url, portal_url, dpath)
75            if '.htm' not in path:
76                path += '.html'
77            raw = self._httpget(url)       
78            soup = BeautifulSoup(raw)
79            #process document actions
80            self._deployDocumentActions(brain, portal_url, path, soup)           
81            self.runFilters(brain, soup, dpath, portal_url)
82            self._writeFile(path, soup.prettify())
83        else:
84            #get file object
85            path = self._getObjPath(url, portal_url, dpath)
86            raw = self._httpget(url)
87            self._writeFile(path, raw)
88        #process rdf for the object
89        self._deployRDF(brain, url, path)
90
91    def _deployRDF(self, brain, url, path):
92        """ Deploy the RDF for an object """
93        rdfurl = '%s/rdf' %url
94        rdfpath = '%s-rdf' %path
95        if brain.Type in ['Page', 'Course', 'Division', 'Folder', 'File', 'Image']:
96            rdfraw = self._httpget(rdfurl)
97            self._writeFile(rdfpath, rdfraw)     
98
99    def runFilters(self, brain, soup, dpath, portal_url):
100        """ Filter content """
101        rel_path = './'
102        #overwrite file
103        soup.base.extract()
104        self._rewriteHREF(brain, soup, dpath, portal_url)
105
106    def _deploySiteStructure(self, context, ssprops, domain, dpath):
107        """ Get the base framework and resources needed to build the chrome locally  """
108        portal_url = context.portal_url()
109        portal_catalog = contex.portal_catalog
110       
111        for x in self.base_files:
112            #Download files/images that are used sitewide in the chrome
113            url = '%s/%s' % (portal_url, x)
114            path = self._getObjPath(url, portal_url, dpath)
115            raw = self._httpget(url)
116            self._writeFile(path, raw)
117           
118        for x in self.extra_views:
119            #Download zope3 views and/or additional views that are not included in the site catalog search
120            url = '%s/%s' % (portal_url, x)
121            path = self._getObjPath(url, portal_url, dpath)
122            path += '.html'
123            raw = self._httpget(url)
124            #finish this
125            brain = portal_catalog.searchResults()
126            soup = BeautifulSoup(raw)
127            self.runFilters(brain, soup, dpath, portal_url)
128            self._writeFile(path, raw)
129
130        #get soup of front page
131        fp = self._httpget(portal_url)
132        soup = BeautifulSoup(fp)
133        types = ['css', 'js']
134           
135        for x in types:
136            if x == 'css':
137                for tag in soup.head.findAll('style'):
138                    #analyze content and extract url to CSS
139                    contents = tag.contents[0]
140                    url = re.match(r"""[^(]*\((.*)\)[^)]*$""", contents, re.X)
141                    url = url.group(1)
142                    path = self._deployCompressedResources(url, portal_url, dpath)
143                    css_path = os_split(path)[0]
144                for img in self.css_images:
145                    #download css_images
146                    url = '%s/%s' % (portal_url, img)
147                    path = '%s/%s' % (css_path, img)
148                    try:
149                        raw = self._httpget(url)
150                    except HTTPError:
151                        continue
152                    self._writeFile(path, raw)                   
153                   
154            elif x == 'js':
155                for tag in soup.head.findAll('script'):
156                    if tag.has_key('src'):
157                        url = tag['src']
158                        self._deployCompressedResources(url, portal_url, dpath)
159
160
161    def _deploySiteActions(self, context, dpath):
162        """  deploys portal_actions site_actions content """
163        excluded = ['plone_setup', 'deploy']       
164        site_actions = context.portal_actions.site_actions.listActions()
165        for x in site_actions:
166            if x.id not in excluded and x.visible == True:
167                portal_url = context.portal_url()
168                action_url = x.url_expr.split('/')[-1]
169                url = '%s/%s' % (portal_url, action_url)
170                path = self._getObjPath(url, portal_url, dpath)
171                path += '.html'
172                raw = self._httpget(url)       
173                self._writeFile(path, raw)
174       
175    def _deployDocumentActions(self, brain, portal_url, path, soup):
176        """ Deploys the document actions for the site """
177        doc_actions = brain.portal_actions.document_actions.listActions()
178        for doc_action in doc_actions:
179            if doc_action.id in ['skinless', 's5']:
180                action_url = self._findDocActionUrl(soup, doc_action.id)
181                if action_url:
182                    path = '%s-%s.html' %(path, doc_action.id)
183                    if action_url.find(portal_url) == -1:
184                        action_url = '%s/%s' %(portal_url, action_url)
185                    path = '%s-%s.html' %(path, doc_action.id)
186                    raw = self._httpget(action_url)
187                    self._writeFile(path, raw)
188            elif doc_action.id in ['rss', 'rss_front']:
189                action_url = self._findDocActionUrl(soup, doc_action.id)
190                if action_url:
191                    path = '%s-%s' %(path, doc_action.id)
192                    if action_url.find(portal_url) == -1:
193                        action_url = '%s/%s' %(portal_url, action_url)
194                    path = '%s-%s' %(path, doc_action.id)
195                    raw = self._httpget(action_url)
196                    self._writeFile(path, raw)
197                   
198
199    def _findDocActionUrl(self, soup, id):
200        """ Parse document soup to find a document action """
201        docdivs = soup.body.findAll('div')
202        action_url = None
203        for x in docdivs:
204            if x.has_key('class') and x['class'] == 'documentActions':
205                action = x.find('li', id='document-action-%s' %id)
206                if action:
207                    anchor = action.find('a')
208                    if anchor and anchor.has_key('href'):
209                        action_url = anchor['href']
210                break
211        return action_url
212
213    def _getURLPath(self, brain, link):
214        """ Retrieve the URL path on the filesystem. """
215        rel_prefix = ''
216        rel_middlefix = ''
217        rel_id = ''
218
219        rel_path = self._getRelativePath('/'.join(brain.getURL().split('/')[:-1]), link)
220        rel_id = rel_path.split('/')[-1]
221        if '#' in rel_path:
222            rel_prefix = rel_path.split('#')[0]
223            rel_middlefix = rel_path.split('#')[-1]
224            rel_id = rel_id.split('#')[0]
225
226        if rel_id in self.extra_views:
227            #mod link to have +.html
228            if len(rel_prefix) > 0:
229                rel_path = '%s%s.html' % (rel_prefix, rel_middlefix)
230            else:
231                rel_path += '.html'
232        else:
233            #brain_url = brain.getURL()
234               
235            # rel_path = '../../division/course/page1
236           
237            link_brain = brain.portal_catalog.searchResults(
238                          path={'query':link, 'depth':1, 'id'= rel_id})                                     
239
240            #link_brain = brain.portal_catalog.searchResults(query={'path':rel_path,},id=rel_path.split('/')[0])
241
242
243            if link_brain[0].is_folderish:
244                #mod link to have /index.html
245                link += '/index.html'               
246            elif link_brain[0].Type == 'Page':
247                #mod link to have +.html
248                link += '.html'
249        return link
250
251    def _deployCompressedResources(self, url, portal_url, dpath):
252        """ extracts resources from soup and writes to static path location """
253        path = self._getObjPath(url, portal_url, dpath)
254        path = path.replace('%20','_')
255        raw = self._httpget(url)
256        self._writeFile(path, raw)
257        return path
258                   
259    def _getDeploymentPath(self, sp):
260        """ Get the default static path location. """
261        dpath = os_split(BobobaseName)[0]
262        return os_join(dpath, sp)
263       
264    def _getObjPath(self, url, portal_url, dpath):
265        """ Get the object path based on the deployment path. """
266        objpath = url.replace(portal_url + '/', '')
267        path = dpath
268        for x in objpath.split('/'):
269            path = os_join(path, x)
270        return path
271   
272    def _createDirectory(self, path):
273        """ Create a directory on the filesystem """     
274        if not os_lexists(path):
275            os_makedirs(path)
276           
277    def _httpget(self, url):
278        """ Get html for the url """
279        f = urlopen(url)
280        data = f.read()
281        f.close()
282        return data
283
284    def _writeFile(self, fn, data):
285        self._createDirectory(os_split(fn)[0])
286        f = open(fn, 'w')
287        f.write(data)
288        f.close()
289   
290    def _getRelativePath(self, path1, path2):
291        common = os.path.commonprefix ([path1, path2])
292        p1 = path1[len(common):]
293        p2 = path2[len(common):]
294        if os.path.isfile (path1):
295            p1 = os.path.dirname (p1)
296        dirs = string.split (p1, os.sep)
297        dotdot = map (lambda x: '..', dirs)
298        dotpath = string.join (dotdot, os.sep)
299        rel_path = os.path.join (dotpath, p2)       
300        if rel_path.startswith(os.sep):
301            rel_path = '.%s' % rel_path
302        return rel_path
303           
304    def _rewriteHREF(self, brain, soup, dpath, portal_url):
305        tags = soup.findAll('a')
306        for tag in tags:
307            if tag.has_key('href') and contains portal_url:
308                self._getURLPath(brain, tag['href'])
309        return soup
310       
311
Note: See TracBrowser for help on using the repository browser.