Package tdi :: Package markup :: Package soup :: Module filters
[frames] | no frames]

Source Code for Module tdi.markup.soup.filters

  1  # -*- coding: ascii -*- 
  2  u""" 
  3  :Copyright: 
  4   
  5   Copyright 2006 - 2013 
  6   Andr\xe9 Malo or his licensors, as applicable 
  7   
  8  :License: 
  9   
 10   Licensed under the Apache License, Version 2.0 (the "License"); 
 11   you may not use this file except in compliance with the License. 
 12   You may obtain a copy of the License at 
 13   
 14       http://www.apache.org/licenses/LICENSE-2.0 
 15   
 16   Unless required by applicable law or agreed to in writing, software 
 17   distributed under the License is distributed on an "AS IS" BASIS, 
 18   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 19   See the License for the specific language governing permissions and 
 20   limitations under the License. 
 21   
 22  ===================== 
 23   Soup Filter Classes 
 24  ===================== 
 25   
 26  Filters for soup templates. 
 27  """ 
 28  __author__ = u"Andr\xe9 Malo" 
 29  __docformat__ = "restructuredtext en" 
 30   
 31  import re as _re 
 32   
 33  from tdi import util as _util 
 34  from tdi import filters as _filters 
 35   
 36   
37 -class EncodingDetectFilter(_filters.BaseEventFilter):
38 """ Extract template encoding and pass it properly to the builder """ 39 __slots__ = ('_normalize', '_meta') 40
41 - def __init__(self, builder):
42 """ Initialization """ 43 super(EncodingDetectFilter, self).__init__(builder) 44 self._normalize = self.builder.decoder.normalize 45 self._meta = self._normalize('meta')
46
47 - def handle_starttag(self, name, attr, closed, data):
48 """ 49 Extract encoding from HTML meta element 50 51 Here are samples for the expected formats:: 52 53 <meta charset="utf-8"> <!-- HTML5 --> 54 55 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> 56 57 The event is passed to the builder nevertheless. 58 59 :See: `BuildingListenerInterface` 60 """ 61 normalize = self._normalize 62 63 iname = normalize(name) 64 if iname == self._meta: 65 adict = dict([(normalize(key), val) for key, val in attr]) 66 value = str((adict.get(normalize('charset')) or '')) 67 if value.startswith('"') or value.startswith("'"): 68 value = value[1:-1].strip() 69 if value: 70 self.builder.handle_encoding(value) 71 else: 72 value = (adict.get(normalize('http-equiv')) or '').lower() 73 if value.startswith('"') or value.startswith("'"): 74 value = value[1:-1].strip() 75 if value == 'content-type': 76 ctype = adict.get(normalize('content')) 77 if ctype: 78 if ctype.startswith('"') or ctype.startswith("'"): 79 ctype = ctype[1:-1].strip() 80 81 parsed = _util.parse_content_type(ctype) 82 if parsed is not None: 83 encoding = parsed[1].get('charset') 84 if encoding: 85 self.builder.handle_encoding( 86 encoding[0].strip() 87 ) 88 89 self.builder.handle_starttag(name, attr, closed, data)
90 91 #: Regex matcher to match xml declarations 92 #: 93 #: :Type: ``callable`` 94 _PI_MATCH = _re.compile(r''' 95 <\? \s* [xX][mM][lL] \s+ (?P<attr> 96 [^"'?]* 97 (?: 98 (?: 99 "[^"]*" 100 | '[^']*' 101 ) 102 [^"'?]* 103 )* 104 ) 105 \s* \?>$ 106 ''', _re.X).match 107 108 #: Iterator over the matched xml declaration attributes 109 #: 110 #: :Type: ``callable`` 111 _PI_ATT_ITER = _re.compile(r''' 112 \s* 113 (?P<name>[^\s=]*) # attribute name 114 \s* 115 = 116 (?P<value> # value 117 \s*"[^"]*" 118 | \s*'[^']*' 119 ) 120 ''', _re.X).finditer 121
122 - def handle_pi(self, data):
123 """ 124 Extract encoding from xml declaration 125 126 Here's a sample for the expected format:: 127 128 <?xml version="1.0" encoding="ascii" ?> 129 130 The event is passed to the builder nevertheless. 131 132 :See: `BuildingListenerInterface` 133 """ 134 match = self._PI_MATCH(str(data)) 135 if match: 136 encoding = 'utf-8' # xml default 137 for match in self._PI_ATT_ITER(match.group('attr')): 138 key, value = match.group('name', 'value') 139 if key or value: 140 if key == 'encoding': 141 value = value.strip() 142 if value.startswith('"') or value.startswith("'"): 143 value = value[1:-1].strip() 144 if value: 145 encoding = value 146 break 147 else: 148 break 149 self.builder.handle_encoding(encoding) 150 self.builder.handle_pi(data)
151 152 from tdi import c 153 c = c.load('impl') 154 if c is not None: 155 EncodingDetectFilter = c.SoupEncodingDetectFilter 156 del c 157