source: enpraxis.staticsite/trunk/enpraxis/staticsite/utilities/staticsiteutility.py @ 291

Revision 291, 11.1 KB checked in by jon, 4 years ago (diff)

Committing changes to rewrite from linktable to url to filesystem path

Line 
1import os
2import re
3import string
4from BeautifulSoup import BeautifulSoup
5from Globals import BobobaseName
6from os import makedirs as os_makedirs
7from os.path import join as os_join
8from os.path import lexists as os_lexists
9from os.path import split as os_split
10
11from urllib2 import urlopen
12from urllib2 import HTTPError
13
14from OFS.SimpleItem import SimpleItem
15from zope.interface import implements
16from interfaces import IStaticSiteUtility
17from zope.component import getUtility, getMultiAdapter
18
19class StaticSiteUtility(SimpleItem):
20    """ Deploy a static site """
21
22    implements(IStaticSiteUtility)
23
24    def deploySite(self, context):
25        """ Deploy the site """
26
27        ssprops = context.portal_url.portal_properties.staticsite_properties
28        domain = ssprops.getProperty('domain')
29        dpath = self._getDeploymentPath(ssprops.getProperty('deployment_path'))
30        pstates = ssprops.getProperty('published_states')
31       
32        self._deploySiteStructure(context, ssprops, domain, dpath)
33        self._deploySiteActions(context, dpath)
34
35        brains = context.portal_catalog.searchResults(
36            path={'query':'/'.join(context.getPhysicalPath()),
37                  'depth':1,},
38            review_state=pstates)
39        for x in brains:
40            url = x.getURL()
41            self.deployObject(url, x, dpath, domain)
42            self.traverse(x, dpath, domain, pstates)
43
44    def traverse(self, brain, dpath, domain, pstates):
45        """ Traverse the site. """
46        brains = brain.portal_catalog.searchResults(
47            path={'query':brain.getPath(), 'depth':1},
48                  review_state=pstates)
49        for x in brains:
50            url = x.getURL()
51            self.deployObject(url, x, dpath, domain)
52            if x.is_folderish:
53                self.traverse(x, dpath, domain, pstates)
54           
55    def deployObject(self, url, brain, dpath, domain):
56        """ Deploy an object """
57        portal_url = brain.portal_url()
58        soup = ''
59        if brain.is_folderish:
60            #create dir before processing html         
61            objpath = self._getObjPath(url, portal_url, dpath)
62            self._createDirectory(objpath)
63            path = '%s/index.html' % objpath
64            raw = self._httpget(url)
65            soup = BeautifulSoup(raw)
66            #process document actions
67            self._deployDocumentActions(brain, portal_url, path, soup)
68            self.runFilters(brain, soup, portal_url, dpath)
69            self._writeFile(path, soup.prettify())
70        elif brain.Type in ['Page']:
71            path = self._getObjPath(url, portal_url, dpath)
72            if '.htm' not in path:
73                path += '.html'
74            raw = self._httpget(url)       
75            soup = BeautifulSoup(raw)
76            #process document actions
77            self._deployDocumentActions(brain, portal_url, path, soup)           
78            self.runFilters(brain, soup, portal_url, dpath)
79            self._writeFile(path, soup.prettify())
80        else:
81            #get file object
82            path = self._getObjPath(url, portal_url, dpath)
83            self.linkTable[url] = {'dpath':path, 'type' : 'file'} 
84            raw = self._httpget(url)
85            soup = BeautifulSoup(raw)
86            self.runFilters(brain, soup, portal_url, dpath)       
87            self._writeFile(path, raw)
88        #process rdf for the object
89        self._deployRDF(brain, url, path)
90
91    def _deployRDF(self, brain, url, path):
92        """ Deploy the RDF for an object """
93        rdfurl = '%s/rdf' %url
94        rdfpath = '%s-rdf' %path
95        if brain.Type in ['Page', 'Course', 'Division', 'Folder', 'File', 'Image']:
96            rdfraw = self._httpget(rdfurl)
97            self._writeFile(rdfpath, rdfraw)     
98
99    def runFilters(self, brain, soup, portal_url, dpath):
100        """ Filter content """
101        rel_path = './'
102        #overwrite file
103        soup.base.extract()
104        self._rewriteHREF(brain, soup, dpath)
105
106    def _deploySiteStructure(self, context, ssprops, domain, dpath):
107        """ Get the base framework and resources needed to build the chrome locally  """
108        base_files = ssprops.base_files
109        css_images = ssprops.css_images
110        extra_views = ssprops.extra_views
111       
112        portal_url = context.portal_url()
113       
114        for x in base_files:
115            #Download files/images that are used sitewide in the chrome
116            url = '%s/%s' % (portal_url, x)
117            path = self._getObjPath(url, portal_url, dpath)
118            raw = self._httpget(url)
119            self._writeFile(path, raw)
120           
121        for x in extra_views:
122            #Download zope3 views and/or additional views that are not included in the site catalog search
123            url = '%s/%s' % (portal_url, x)
124            path = self._getObjPath(url, portal_url, dpath)
125            path += '.html'
126            raw = self._httpget(url)
127            self._writeFile(path, raw)
128
129        #get soup of front page
130        fp = self._httpget(portal_url)
131        soup = BeautifulSoup(fp)
132        types = ['css', 'js']
133           
134        for x in types:
135            if x == 'css':
136                for tag in soup.head.findAll('style'):
137                    #analyze content and extract url to CSS
138                    contents = tag.contents[0]
139                    url = re.match(r"""[^(]*\((.*)\)[^)]*$""", contents, re.X)
140                    url = url.group(1)
141                    path = self._deployCompressedResources(url, portal_url, dpath)
142                    css_path = os_split(path)[0]
143                for img in css_images:
144                    #download css_images
145                    url = '%s/%s' % (portal_url, img)
146                    path = '%s/%s' % (css_path, img)
147                    try:
148                        raw = self._httpget(url)
149                    except HTTPError:
150                        continue
151                    self._writeFile(path, raw)                   
152                   
153            elif x == 'js':
154                for tag in soup.head.findAll('script'):
155                    if tag.has_key('src'):
156                        url = tag['src']
157                        self._deployCompressedResources(url, portal_url, dpath)
158
159
160    def _deploySiteActions(self, context, dpath):
161        """  deploys portal_actions site_actions content """
162        excluded = ['plone_setup', 'deploy']       
163        site_actions = context.portal_actions.site_actions.listActions()
164        for x in site_actions:
165            if x.id not in excluded and x.visible == True:
166                portal_url = context.portal_url()
167                action_url = x.url_expr.split('/')[-1]
168                url = '%s/%s' % (portal_url, action_url)
169                path = self._getObjPath(url, portal_url, dpath)
170                path += '.html'
171                raw = self._httpget(url)       
172                self._writeFile(path, raw)
173       
174    def _deployDocumentActions(self, brain, portal_url, path, soup):
175        """ Deploys the document actions for the site """
176        doc_actions = brain.portal_actions.document_actions.listActions()
177        for doc_action in doc_actions:
178            if doc_action.id in ['skinless', 's5']:
179                action_url = self._findDocActionUrl(soup, doc_action.id)
180                if action_url:
181                    path = '%s-%s.html' %(path, doc_action.id)
182                    if action_url.find(portal_url) == -1:
183                        action_url = '%s/%s' %(portal_url, action_url)
184                    path = '%s-%s.html' %(path, doc_action.id)
185                    raw = self._httpget(action_url)
186                    self._writeFile(path, raw)
187            elif doc_action.id in ['rss', 'rss_front']:
188                action_url = self._findDocActionUrl(soup, doc_action.id)
189                if action_url:
190                    path = '%s-%s' %(path, doc_action.id)
191                    if action_url.find(portal_url) == -1:
192                        action_url = '%s/%s' %(portal_url, action_url)
193                    path = '%s-%s' %(path, doc_action.id)
194                    raw = self._httpget(action_url)
195                    self._writeFile(path, raw)
196                   
197
198    def _findDocActionUrl(self, soup, id):
199        """ Parse document soup to find a document action """
200        docdivs = soup.body.findAll('div')
201        action_url = None
202        for x in docdivs:
203            if x.has_key('class') and x['class'] == 'documentActions':
204                action = x.find('li', id='document-action-%s' %id)
205                if action:
206                    anchor = action.find('a')
207                    if anchor and anchor.has_key('href'):
208                        action_url = anchor['href']
209                break
210        return action_url
211
212    def _getURLPath(self, brain, url):
213        """ Retrieve the URL path on the filesystem. """
214
215        rel_path = self._getRelativePath('/'.join(brain.getURL().split('/')[:-1]), url)
216        rel_path = rel_path.split('#')[0]
217        urlbrains = brain.portal_catalog.searchResults(query={'path':rel_path,},id=rel_path.split('/')[0])
218        for x in urlbrains:
219            if brains.is_folderish:
220                pass
221            elif brain.Type == 'Page':
222                pass
223            else:
224                pass
225
226    def _deployCompressedResources(self, url, portal_url, dpath):
227        """ extracts resources from soup and writes to static path location """
228        path = self._getObjPath(url, portal_url, dpath)
229        path = path.replace('%20','_')
230        raw = self._httpget(url)
231        self._writeFile(path, raw)
232        return path
233                   
234    def _getDeploymentPath(self, sp):
235        """ Get the default static path location. """
236        dpath = os_split(BobobaseName)[0]
237        return os_join(dpath, sp)
238       
239    def _getObjPath(self, url, portal_url, dpath):
240        """ Get the object path based on the deployment path. """
241        objpath = url.replace(portal_url + '/', '')
242        path = dpath
243        for x in objpath.split('/'):
244            path = os_join(path, x)
245        return path
246   
247    def _createDirectory(self, path):
248        """ Create a directory on the filesystem """     
249        if not os_lexists(path):
250            os_makedirs(path)
251           
252    def _httpget(self, url):
253        """ Get html for the url """
254        f = urlopen(url)
255        data = f.read()
256        f.close()
257        return data
258
259    def _writeFile(self, fn, data):
260        self._createDirectory(os_split(fn)[0])
261        f = open(fn, 'w')
262        f.write(data)
263        f.close()
264   
265    def _getRelativePath(self, path1, path2):
266        common = os.path.commonprefix ([path1, path2])
267        p1 = path1[len(common):]
268        p2 = path2[len(common):]
269        if os.path.isfile (path1):
270            p1 = os.path.dirname (p1)
271        dirs = string.split (p1, os.sep)
272        dotdot = map (lambda x: '..', dirs)
273        dotpath = string.join (dotdot, os.sep)
274        rel_path = os.path.join (dotpath, p2)       
275        if rel_path.startswith(os.sep):
276            rel_path = '.%s' % rel_path
277        return rel_path
278           
279    def _rewriteHREF(self, brain, soup, dpath):
280        tags = soup.findAll('a')
281        for tag in tags:
282            if tag.has_key('href'):
283                self._getURLPath(brain, tag['href'])
284        return soup
285       
286
Note: See TracBrowser for help on using the repository browser.