Coverage for aiocoap/util/vendored/link_header.py: 99%

92 statements  

« prev     ^ index     » next       coverage.py v7.6.3, created at 2024-10-15 22:10 +0000

1# SPDX-FileCopyrightText: Michael Burrows <mjb@asplake.co.uk> 

2# 

3# SPDX-License-Identifier: BSD-3-Clause 

4 

5""" 

6Parse and format link headers according to RFC 5988 "Web Linking". 

7 

8Usage (assuming a suitable headers object in the environment): 

9 

10>>> headers['Link'] = str(LinkHeader([Link("http://example.com/foo", rel="self"), 

11... Link("http://example.com", rel="up")])) 

12>>> headers['Link'] 

13'<http://example.com/foo>; rel=self, <http://example.com>; rel=up' 

14>>> parse(headers['Link']) 

15LinkHeader([Link('http://example.com/foo', rel='self'), Link('http://example.com', rel='up')]) 

16 

17Blank and missing values roundtrip correctly: 

18 

19>>> format_link(parse('</s/1>; obs; if="core.s"; foo=""')) 

20'<</s/1>; obs; if=core.s; foo="">' 

21 

22Conversions to and from json-friendly list-based structures are also provided: 

23 

24>>> parse(headers['Link']).to_py() 

25[['http://example.com/foo', [['rel', 'self']]], ['http://example.com', [['rel', 'up']]]] 

26>>> str(LinkHeader([['http://example.com/foo', [['rel', 'self']]], 

27... ['http://example.com', [['rel', 'up']]]])) 

28'<http://example.com/foo>; rel=self, <http://example.com>; rel=up' 

29 

30For further information see parse(), LinkHeader and Link. 

31""" 

32 

33import re 

34from typing import Dict 

35from urllib.parse import urljoin 

36 

37__all__ = [ 

38 "parse", 

39 "format_links", 

40 "format_link", 

41 "LinkHeader", 

42 "Link", 

43 "ParseException", 

44] 

45 

46SINGLE_VALUED_ATTRS = ["rel", "anchor", "rev", "media", "title", "title*", "type"] 

47 

48# 

49# Regexes for link header parsing. TOKEN and QUOTED in particular should conform to RFC2616. 

50# 

51# Acknowledgement: The QUOTED regexp is based on 

52# http://stackoverflow.com/questions/249791/regexp-for-quoted-string-with-escaping-quotes/249937#249937 

53# 

54# Trailing spaces are consumed by each pattern. The RE_HREF pattern also allows for any leading spaces. 

55# 

56 

57QUOTED = ( 

58 r'"((?:[^"\\]|\\.)*)"' # double-quoted strings with backslash-escaped double quotes 

59) 

60TOKEN = r"([^()<>@,;:\"\[\]?={}\s]+)" # non-empty sequence of non-separator characters 

61RE_COMMA_HREF = re.compile( 

62 r" *,? *< *([^>]*) *> *" 

63) # includes ',' separator; no attempt to check URI validity 

64RE_ONLY_TOKEN = re.compile(r"^%(TOKEN)s$" % locals()) 

65RE_ATTR = re.compile(r"%(TOKEN)s *(?:= *(%(TOKEN)s|%(QUOTED)s))? *" % locals()) 

66RE_SEMI = re.compile(r"; *") 

67RE_COMMA = re.compile(r", *") 

68 

69 

70def parse(header): 

71 """Parse a link header string, returning a LinkHeader object: 

72 

73 >>> parse('<http://example.com/foo>; rel="foo bar", <http://example.com>; rel=up; type=text/html') 

74 LinkHeader([Link('http://example.com/foo', rel='foo bar'), Link('http://example.com', rel='up', type='text/html')]) 

75 

76 ParseException is raised in the event that the input string is not parsed completely: 

77 

78 >>> parse('<http://example.com/foo> error') #doctest: +SKIP 

79 Traceback (most recent call last): 

80 ... 

81 ParseException: ('link_header.parse() failed near %s', "'error'") 

82 """ 

83 scanner = _Scanner(header) 

84 links = [] 

85 while scanner.scan(RE_COMMA_HREF): 

86 href = scanner[1] 

87 attrs = [] 

88 while scanner.scan(RE_SEMI): 

89 if scanner.scan(RE_ATTR): 

90 attr_name, token, quoted = scanner[1], scanner[3], scanner[4] 

91 if quoted is not None: 

92 attrs.append([attr_name, quoted.replace(r"\"", '"')]) 

93 elif token is not None: 

94 attrs.append([attr_name, token]) 

95 else: 

96 attrs.append([attr_name, None]) 

97 links.append(Link(href, attrs)) 

98 

99 if scanner.buf: 

100 raise ParseException("link_header.parse() failed near %s", repr(scanner.buf)) 

101 

102 return LinkHeader(links) 

103 

104 

105def format_links(*args, **kwargs): 

106 return str(LinkHeader(*args, **kwargs)) 

107 

108 

109def format_link(*args, **kwargs): 

110 return str(Link(*args, **kwargs)) 

111 

112 

113class ParseException(Exception): 

114 pass 

115 

116 

117class LinkHeader(object): 

118 """Represents a sequence of links that can be formatted together as a link header.""" 

119 

120 def __init__(self, links=None): 

121 """Initializes a LinkHeader object with a list of Link objects or with 

122 list of parameters from which Link objects can be created: 

123 

124 >>> LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]) 

125 LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]) 

126 >>> LinkHeader([['http://example.com/foo', [['rel', 'foo']]], ['http://example.com', [['rel', 'up']]]]) 

127 LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]) 

128 

129 The Link objects can be accessed afterwards via the `links` property. 

130 

131 String conversion follows the spec: 

132 

133 >>> str(LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')])) 

134 '<http://example.com/foo>; rel=foo, <http://example.com>; rel=up' 

135 

136 Conversion to json-friendly list-based structures: 

137 

138 >>> LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]).to_py() 

139 [['http://example.com/foo', [['rel', 'foo']]], ['http://example.com', [['rel', 'up']]]] 

140 

141 """ 

142 

143 self.links = [ 

144 link if isinstance(link, Link) else Link(*link) for link in links or [] 

145 ] 

146 

147 def to_py(self): 

148 """Supports list conversion: 

149 

150 >>> LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]).to_py() 

151 [['http://example.com/foo', [['rel', 'foo']]], ['http://example.com', [['rel', 'up']]]] 

152 """ 

153 return [link.to_py() for link in self.links] 

154 

155 def __repr__(self): 

156 return "LinkHeader([%s])" % ", ".join(repr(link) for link in self.links) 

157 

158 def __str__(self): 

159 """Formats a link header: 

160 

161 >>> str(LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')])) 

162 '<http://example.com/foo>; rel=foo, <http://example.com>; rel=up' 

163 """ 

164 return ", ".join(str(link) for link in self.links) 

165 

166 def links_by_attr_pairs(self, pairs): 

167 """Lists links that have attribute pairs matching all the supplied pairs: 

168 

169 >>> parse('<http://example.com/foo>; rel="foo", <http://example.com>; rel="up"' 

170 ... ).links_by_attr_pairs([('rel', 'up')]) 

171 [Link('http://example.com', rel='up')] 

172 """ 

173 return [ 

174 link 

175 for link in self.links 

176 if all([key, value] in link.attr_pairs for key, value in pairs) 

177 ] 

178 

179 

180class Link(object): 

181 """Represents a single link.""" 

182 

183 def __init__(self, href, attr_pairs=None, **kwargs): 

184 """Initializes a Link object with an href and attributes either in 

185 the form of a sequence of key/value pairs &/or as keyword arguments. 

186 The sequence form allows to be repeated. Attributes may be accessed 

187 subsequently via the `attr_pairs` property. 

188 

189 String conversion follows the spec: 

190 

191 >>> str(Link('http://example.com', [('foo', 'bar'), ('foo', 'baz')], rel='self')) 

192 '<http://example.com>; foo=bar; foo=baz; rel=self' 

193 

194 Conversion to json-friendly list-based structures: 

195 

196 >>> Link('http://example.com', [('foo', 'bar'), ('foo', 'baz')], rel='self').to_py() 

197 ['http://example.com', [['foo', 'bar'], ['foo', 'baz'], ['rel', 'self']]] 

198 """ 

199 self.href = href 

200 self.attr_pairs = [ 

201 list(pair) for pair in (attr_pairs or []) + list(kwargs.items()) 

202 ] 

203 

204 def to_py(self): 

205 """Convert to a json-friendly list-based structure: 

206 

207 >>> Link('http://example.com', rel='foo').to_py() 

208 ['http://example.com', [['rel', 'foo']]] 

209 """ 

210 return [self.href, self.attr_pairs] 

211 

212 def __repr__(self): 

213 """ 

214 >>> Link('http://example.com', rel='self') 

215 Link('http://example.com', rel='self') 

216 """ 

217 return "Link(%s)" % ", ".join( 

218 [repr(self.href)] 

219 + ["%s=%s" % (pair[0], repr(pair[1])) for pair in self.attr_pairs] 

220 ) 

221 

222 def __str__(self): 

223 """Formats a single link: 

224 

225 >>> str(Link('http://example.com/foo', [['rel', 'self']])) 

226 '<http://example.com/foo>; rel=self' 

227 >>> str(Link('http://example.com/foo', [['rel', '"quoted"'], ['type', 'text/html'], ['title*', "UTF-8'en'%e2%82%ac%20rates"]])) 

228 '<http://example.com/foo>; rel="\\\\"quoted\\\\""; type=text/html; title*=UTF-8\\'en\\'%e2%82%ac%20rates' 

229 

230 Note that there is no explicit support for the title* attribute other 

231 than to output it unquoted. Where used, it is up to client applications to 

232 provide values that meet RFC2231 Section 7. 

233 """ 

234 

235 def str_pair(key, value): 

236 if value is None: 

237 return key 

238 elif RE_ONLY_TOKEN.match(value) or key.endswith("*"): 

239 return "%s=%s" % (key, value) 

240 else: 

241 return '%s="%s"' % (key, value.replace('"', r"\"")) 

242 

243 return "; ".join( 

244 ["<%s>" % self.href] 

245 + [str_pair(key, value) for key, value in self.attr_pairs] 

246 ) 

247 

248 def __getattr__(self, name): 

249 """ 

250 >>> Link('/', rel='self').rel 

251 'self' 

252 >>> Link('/', hreflang='EN').hreflang 

253 ['EN'] 

254 >>> Link('/', foo='bar').foo 

255 ['bar'] 

256 >>> Link('/', [('foo', 'bar'), ('foo', 'baz')]).foo 

257 ['bar', 'baz'] 

258 >>> Link('/').rel #doctest: +ELLIPSIS 

259 Traceback (most recent call last): 

260 ... 

261 AttributeError: No attribute of type 'rel' present 

262 >>> Link('/').hreflang 

263 [] 

264 >>> Link('/').foo 

265 [] 

266 """ 

267 name_lower = name.lower() 

268 values = [value for key, value in self.attr_pairs if key.lower() == name_lower] 

269 if name in SINGLE_VALUED_ATTRS: 

270 if values: 

271 return values[0] 

272 else: 

273 raise AttributeError("No attribute of type %r present" % name_lower) 

274 return values 

275 

276 def __contains__(self, name): 

277 """ 

278 >>> 'rel' in Link('/', rel='self') 

279 True 

280 >>> 'obs' in Link('/', obs=None) 

281 True 

282 >>> 'rel' in Link('/') 

283 False 

284 """ 

285 name_lower = name.lower() 

286 return any(key.lower() == name_lower for key, value in self.attr_pairs) 

287 

288 def get_context(self, requested_resource_address): 

289 """Return the absolute URI of the context of a link. This is usually 

290 equals the base address the statement is about (eg. the requested URL 

291 if the link header was served in a successful HTTP GET request), but 

292 can be overridden by the anchor parameter. 

293 

294 >>> Link('../', rel='index').get_context('http://www.example.com/book1/chapter1/') 

295 'http://www.example.com/book1/chapter1/' 

296 >>> Link('', rel='next', anchor='../').get_context('http://www.example.com/book1/chapter1/') 

297 'http://www.example.com/book1/' 

298 """ 

299 if "anchor" in self: 

300 return urljoin(requested_resource_address, self.anchor) 

301 return requested_resource_address 

302 

303 def get_target(self, requested_resource_address): 

304 """Return the absolute URI of the target of a link. It is determined by 

305 joining the address from which the link header was retrieved with the 

306 link-value (inside angular brackets) according to RFC3986 section 5. 

307 

308 >>> Link('../', rel='index').get_target('http://www.example.com/book1/chapter1/') 

309 'http://www.example.com/book1/' 

310 >>> Link('', rel='next', anchor='../').get_target('http://www.example.com/book1/chapter1/') 

311 'http://www.example.com/book1/chapter1/' 

312 """ 

313 return urljoin(requested_resource_address, self.href) 

314 

315 

316class _Scanner(object): 

317 def __init__(self, buf): 

318 self.buf = buf 

319 self.match = None 

320 

321 def __getitem__(self, key): 

322 return self.match.group(key) 

323 

324 def scan(self, pattern): 

325 self.match = pattern.match(self.buf) 

326 if self.match: 

327 self.buf = self.buf[self.match.end() :] 

328 return self.match 

329 

330 

331# For doctest 

332headers: Dict[str, str] = {}