Coverage for aiocoap/util/vendored/link_header.py: 99%
92 statements
« prev ^ index » next coverage.py v7.6.8, created at 2024-11-28 12:34 +0000
« prev ^ index » next coverage.py v7.6.8, created at 2024-11-28 12:34 +0000
1# SPDX-FileCopyrightText: Michael Burrows <mjb@asplake.co.uk>
2#
3# SPDX-License-Identifier: BSD-3-Clause
5"""
6Parse and format link headers according to RFC 5988 "Web Linking".
8Usage (assuming a suitable headers object in the environment):
10>>> headers['Link'] = str(LinkHeader([Link("http://example.com/foo", rel="self"),
11... Link("http://example.com", rel="up")]))
12>>> headers['Link']
13'<http://example.com/foo>; rel=self, <http://example.com>; rel=up'
14>>> parse(headers['Link'])
15LinkHeader([Link('http://example.com/foo', rel='self'), Link('http://example.com', rel='up')])
17Blank and missing values roundtrip correctly:
19>>> format_link(parse('</s/1>; obs; if="core.s"; foo=""'))
20'<</s/1>; obs; if=core.s; foo="">'
22Conversions to and from json-friendly list-based structures are also provided:
24>>> parse(headers['Link']).to_py()
25[['http://example.com/foo', [['rel', 'self']]], ['http://example.com', [['rel', 'up']]]]
26>>> str(LinkHeader([['http://example.com/foo', [['rel', 'self']]],
27... ['http://example.com', [['rel', 'up']]]]))
28'<http://example.com/foo>; rel=self, <http://example.com>; rel=up'
30For further information see parse(), LinkHeader and Link.
31"""
33import re
34from typing import Dict
35from urllib.parse import urljoin
37__all__ = [
38 "parse",
39 "format_links",
40 "format_link",
41 "LinkHeader",
42 "Link",
43 "ParseException",
44]
46SINGLE_VALUED_ATTRS = ["rel", "anchor", "rev", "media", "title", "title*", "type"]
48#
49# Regexes for link header parsing. TOKEN and QUOTED in particular should conform to RFC2616.
50#
51# Acknowledgement: The QUOTED regexp is based on
52# http://stackoverflow.com/questions/249791/regexp-for-quoted-string-with-escaping-quotes/249937#249937
53#
54# Trailing spaces are consumed by each pattern. The RE_HREF pattern also allows for any leading spaces.
55#
57QUOTED = (
58 r'"((?:[^"\\]|\\.)*)"' # double-quoted strings with backslash-escaped double quotes
59)
60TOKEN = r"([^()<>@,;:\"\[\]?={}\s]+)" # non-empty sequence of non-separator characters
61RE_COMMA_HREF = re.compile(
62 r" *,? *< *([^>]*) *> *"
63) # includes ',' separator; no attempt to check URI validity
64RE_ONLY_TOKEN = re.compile(r"^%(TOKEN)s$" % locals())
65RE_ATTR = re.compile(r"%(TOKEN)s *(?:= *(%(TOKEN)s|%(QUOTED)s))? *" % locals())
66RE_SEMI = re.compile(r"; *")
67RE_COMMA = re.compile(r", *")
70def parse(header):
71 """Parse a link header string, returning a LinkHeader object:
73 >>> parse('<http://example.com/foo>; rel="foo bar", <http://example.com>; rel=up; type=text/html')
74 LinkHeader([Link('http://example.com/foo', rel='foo bar'), Link('http://example.com', rel='up', type='text/html')])
76 ParseException is raised in the event that the input string is not parsed completely:
78 >>> parse('<http://example.com/foo> error') #doctest: +SKIP
79 Traceback (most recent call last):
80 ...
81 ParseException: ('link_header.parse() failed near %s', "'error'")
82 """
83 scanner = _Scanner(header)
84 links = []
85 while scanner.scan(RE_COMMA_HREF):
86 href = scanner[1]
87 attrs = []
88 while scanner.scan(RE_SEMI):
89 if scanner.scan(RE_ATTR):
90 attr_name, token, quoted = scanner[1], scanner[3], scanner[4]
91 if quoted is not None:
92 attrs.append([attr_name, quoted.replace(r"\"", '"')])
93 elif token is not None:
94 attrs.append([attr_name, token])
95 else:
96 attrs.append([attr_name, None])
97 links.append(Link(href, attrs))
99 if scanner.buf:
100 raise ParseException("link_header.parse() failed near %s", repr(scanner.buf))
102 return LinkHeader(links)
105def format_links(*args, **kwargs):
106 return str(LinkHeader(*args, **kwargs))
109def format_link(*args, **kwargs):
110 return str(Link(*args, **kwargs))
113class ParseException(Exception):
114 pass
117class LinkHeader(object):
118 """Represents a sequence of links that can be formatted together as a link header."""
120 def __init__(self, links=None):
121 """Initializes a LinkHeader object with a list of Link objects or with
122 list of parameters from which Link objects can be created:
124 >>> LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')])
125 LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')])
126 >>> LinkHeader([['http://example.com/foo', [['rel', 'foo']]], ['http://example.com', [['rel', 'up']]]])
127 LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')])
129 The Link objects can be accessed afterwards via the `links` property.
131 String conversion follows the spec:
133 >>> str(LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]))
134 '<http://example.com/foo>; rel=foo, <http://example.com>; rel=up'
136 Conversion to json-friendly list-based structures:
138 >>> LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]).to_py()
139 [['http://example.com/foo', [['rel', 'foo']]], ['http://example.com', [['rel', 'up']]]]
141 """
143 self.links = [
144 link if isinstance(link, Link) else Link(*link) for link in links or []
145 ]
147 def to_py(self):
148 """Supports list conversion:
150 >>> LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]).to_py()
151 [['http://example.com/foo', [['rel', 'foo']]], ['http://example.com', [['rel', 'up']]]]
152 """
153 return [link.to_py() for link in self.links]
155 def __repr__(self):
156 return "LinkHeader([%s])" % ", ".join(repr(link) for link in self.links)
158 def __str__(self):
159 """Formats a link header:
161 >>> str(LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]))
162 '<http://example.com/foo>; rel=foo, <http://example.com>; rel=up'
163 """
164 return ", ".join(str(link) for link in self.links)
166 def links_by_attr_pairs(self, pairs):
167 """Lists links that have attribute pairs matching all the supplied pairs:
169 >>> parse('<http://example.com/foo>; rel="foo", <http://example.com>; rel="up"'
170 ... ).links_by_attr_pairs([('rel', 'up')])
171 [Link('http://example.com', rel='up')]
172 """
173 return [
174 link
175 for link in self.links
176 if all([key, value] in link.attr_pairs for key, value in pairs)
177 ]
180class Link(object):
181 """Represents a single link."""
183 def __init__(self, href, attr_pairs=None, **kwargs):
184 """Initializes a Link object with an href and attributes either in
185 the form of a sequence of key/value pairs &/or as keyword arguments.
186 The sequence form allows to be repeated. Attributes may be accessed
187 subsequently via the `attr_pairs` property.
189 String conversion follows the spec:
191 >>> str(Link('http://example.com', [('foo', 'bar'), ('foo', 'baz')], rel='self'))
192 '<http://example.com>; foo=bar; foo=baz; rel=self'
194 Conversion to json-friendly list-based structures:
196 >>> Link('http://example.com', [('foo', 'bar'), ('foo', 'baz')], rel='self').to_py()
197 ['http://example.com', [['foo', 'bar'], ['foo', 'baz'], ['rel', 'self']]]
198 """
199 self.href = href
200 self.attr_pairs = [
201 list(pair) for pair in (attr_pairs or []) + list(kwargs.items())
202 ]
204 def to_py(self):
205 """Convert to a json-friendly list-based structure:
207 >>> Link('http://example.com', rel='foo').to_py()
208 ['http://example.com', [['rel', 'foo']]]
209 """
210 return [self.href, self.attr_pairs]
212 def __repr__(self):
213 """
214 >>> Link('http://example.com', rel='self')
215 Link('http://example.com', rel='self')
216 """
217 return "Link(%s)" % ", ".join(
218 [repr(self.href)]
219 + ["%s=%s" % (pair[0], repr(pair[1])) for pair in self.attr_pairs]
220 )
222 def __str__(self):
223 """Formats a single link:
225 >>> str(Link('http://example.com/foo', [['rel', 'self']]))
226 '<http://example.com/foo>; rel=self'
227 >>> str(Link('http://example.com/foo', [['rel', '"quoted"'], ['type', 'text/html'], ['title*', "UTF-8'en'%e2%82%ac%20rates"]]))
228 '<http://example.com/foo>; rel="\\\\"quoted\\\\""; type=text/html; title*=UTF-8\\'en\\'%e2%82%ac%20rates'
230 Note that there is no explicit support for the title* attribute other
231 than to output it unquoted. Where used, it is up to client applications to
232 provide values that meet RFC2231 Section 7.
233 """
235 def str_pair(key, value):
236 if value is None:
237 return key
238 elif RE_ONLY_TOKEN.match(value) or key.endswith("*"):
239 return "%s=%s" % (key, value)
240 else:
241 return '%s="%s"' % (key, value.replace('"', r"\""))
243 return "; ".join(
244 ["<%s>" % self.href]
245 + [str_pair(key, value) for key, value in self.attr_pairs]
246 )
248 def __getattr__(self, name):
249 """
250 >>> Link('/', rel='self').rel
251 'self'
252 >>> Link('/', hreflang='EN').hreflang
253 ['EN']
254 >>> Link('/', foo='bar').foo
255 ['bar']
256 >>> Link('/', [('foo', 'bar'), ('foo', 'baz')]).foo
257 ['bar', 'baz']
258 >>> Link('/').rel #doctest: +ELLIPSIS
259 Traceback (most recent call last):
260 ...
261 AttributeError: No attribute of type 'rel' present
262 >>> Link('/').hreflang
263 []
264 >>> Link('/').foo
265 []
266 """
267 name_lower = name.lower()
268 values = [value for key, value in self.attr_pairs if key.lower() == name_lower]
269 if name in SINGLE_VALUED_ATTRS:
270 if values:
271 return values[0]
272 else:
273 raise AttributeError("No attribute of type %r present" % name_lower)
274 return values
276 def __contains__(self, name):
277 """
278 >>> 'rel' in Link('/', rel='self')
279 True
280 >>> 'obs' in Link('/', obs=None)
281 True
282 >>> 'rel' in Link('/')
283 False
284 """
285 name_lower = name.lower()
286 return any(key.lower() == name_lower for key, value in self.attr_pairs)
288 def get_context(self, requested_resource_address):
289 """Return the absolute URI of the context of a link. This is usually
290 equals the base address the statement is about (eg. the requested URL
291 if the link header was served in a successful HTTP GET request), but
292 can be overridden by the anchor parameter.
294 >>> Link('../', rel='index').get_context('http://www.example.com/book1/chapter1/')
295 'http://www.example.com/book1/chapter1/'
296 >>> Link('', rel='next', anchor='../').get_context('http://www.example.com/book1/chapter1/')
297 'http://www.example.com/book1/'
298 """
299 if "anchor" in self:
300 return urljoin(requested_resource_address, self.anchor)
301 return requested_resource_address
303 def get_target(self, requested_resource_address):
304 """Return the absolute URI of the target of a link. It is determined by
305 joining the address from which the link header was retrieved with the
306 link-value (inside angular brackets) according to RFC3986 section 5.
308 >>> Link('../', rel='index').get_target('http://www.example.com/book1/chapter1/')
309 'http://www.example.com/book1/'
310 >>> Link('', rel='next', anchor='../').get_target('http://www.example.com/book1/chapter1/')
311 'http://www.example.com/book1/chapter1/'
312 """
313 return urljoin(requested_resource_address, self.href)
316class _Scanner(object):
317 def __init__(self, buf):
318 self.buf = buf
319 self.match = None
321 def __getitem__(self, key):
322 return self.match.group(key)
324 def scan(self, pattern):
325 self.match = pattern.match(self.buf)
326 if self.match:
327 self.buf = self.buf[self.match.end() :]
328 return self.match
331# For doctest
332headers: Dict[str, str] = {}