Coverage for aiocoap/util/vendored/link

1# SPDX-FileCopyrightText: Michael Burrows <mjb@asplake.co.uk>

3# SPDX-License-Identifier: BSD-3-Clause

5"""

6Parse and format link headers according to RFC 5988 "Web Linking".

8Usage (assuming a suitable headers object in the environment):

10>>> headers['Link'] = str(LinkHeader([Link("http://example.com/foo", rel="self"),

11... Link("http://example.com", rel="up")]))

12>>> headers['Link']

13'<http://example.com/foo>; rel=self, <http://example.com>; rel=up'

14>>> parse(headers['Link'])

15LinkHeader([Link('http://example.com/foo', rel='self'), Link('http://example.com', rel='up')])

17Blank and missing values roundtrip correctly:

19>>> format_link(parse('</s/1>; obs; if="core.s"; foo=""'))

20'<</s/1>; obs; if=core.s; foo="">'

22Conversions to and from json-friendly list-based structures are also provided:

24>>> parse(headers['Link']).to_py()

25[['http://example.com/foo', [['rel', 'self']]], ['http://example.com', [['rel', 'up']]]]

26>>> str(LinkHeader([['http://example.com/foo', [['rel', 'self']]],

27... ['http://example.com', [['rel', 'up']]]]))

28'<http://example.com/foo>; rel=self, <http://example.com>; rel=up'

30For further information see parse(), LinkHeader and Link.

31"""

33import re

34from typing import Dict

35from urllib.parse import urljoin

37__all__ = [

38 "parse",

39 "format_links",

40 "format_link",

41 "LinkHeader",

42 "Link",

43 "ParseException",

44]

46SINGLE_VALUED_ATTRS = ["rel", "anchor", "rev", "media", "title", "title*", "type"]

48#

49# Regexes for link header parsing. TOKEN and QUOTED in particular should conform to RFC2616.

50#

51# Acknowledgement: The QUOTED regexp is based on

52# http://stackoverflow.com/questions/249791/regexp-for-quoted-string-with-escaping-quotes/249937#249937

53#

54# Trailing spaces are consumed by each pattern. The RE_HREF pattern also allows for any leading spaces.

55#

57QUOTED = (

58 r'"((?:[^"\\]|\\.)*)"' # double-quoted strings with backslash-escaped double quotes

59)

60TOKEN = r"([^()<>@,;:\"\[\]?={}\s]+)" # non-empty sequence of non-separator characters

61RE_COMMA_HREF = re.compile(

62 r" *,? *< *([^>]*) *> *"

63) # includes ',' separator; no attempt to check URI validity

64RE_ONLY_TOKEN = re.compile(r"^%(TOKEN)s$" % locals())

65RE_ATTR = re.compile(r"%(TOKEN)s *(?:= *(%(TOKEN)s|%(QUOTED)s))? *" % locals())

66RE_SEMI = re.compile(r"; *")

67RE_COMMA = re.compile(r", *")

70def parse(header):

71 """Parse a link header string, returning a LinkHeader object:

73 >>> parse('<http://example.com/foo>; rel="foo bar", <http://example.com>; rel=up; type=text/html')

74 LinkHeader([Link('http://example.com/foo', rel='foo bar'), Link('http://example.com', rel='up', type='text/html')])

76 ParseException is raised in the event that the input string is not parsed completely:

78 >>> parse('<http://example.com/foo> error') #doctest: +SKIP

79 Traceback (most recent call last):

80 ...

81 ParseException: ('link_header.parse() failed near %s', "'error'")

82 """

83 scanner = _Scanner(header)

84 links = []

85 while scanner.scan(RE_COMMA_HREF):

86 href = scanner[1]

87 attrs = []

88 while scanner.scan(RE_SEMI):

89 if scanner.scan(RE_ATTR):

90 attr_name, token, quoted = scanner[1], scanner[3], scanner[4]

91 if quoted is not None:

92 attrs.append([attr_name, quoted.replace(r"\"", '"')])

93 elif token is not None:

94 attrs.append([attr_name, token])

95 else:

96 attrs.append([attr_name, None])

97 links.append(Link(href, attrs))

99 if scanner.buf:

100 raise ParseException("link_header.parse() failed near %s", repr(scanner.buf))

101

102 return LinkHeader(links)

103

104

105def format_links(*args, **kwargs):

106 return str(LinkHeader(*args, **kwargs))

107

108

109def format_link(*args, **kwargs):

110 return str(Link(*args, **kwargs))

111

112

113class ParseException(Exception):

114 pass

115

116

117class LinkHeader(object):

118 """Represents a sequence of links that can be formatted together as a link header."""

119

120 def __init__(self, links=None):

121 """Initializes a LinkHeader object with a list of Link objects or with

122 list of parameters from which Link objects can be created:

123

124 >>> LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')])

125 LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')])

126 >>> LinkHeader([['http://example.com/foo', [['rel', 'foo']]], ['http://example.com', [['rel', 'up']]]])

127 LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')])

128

129 The Link objects can be accessed afterwards via the `links` property.

130

131 String conversion follows the spec:

132

133 >>> str(LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]))

134 '<http://example.com/foo>; rel=foo, <http://example.com>; rel=up'

135

136 Conversion to json-friendly list-based structures:

137

138 >>> LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]).to_py()

139 [['http://example.com/foo', [['rel', 'foo']]], ['http://example.com', [['rel', 'up']]]]

140

141 """

142

143 self.links = [

144 link if isinstance(link, Link) else Link(*link) for link in links or []

145 ]

146

147 def to_py(self):

148 """Supports list conversion:

149

150 >>> LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]).to_py()

151 [['http://example.com/foo', [['rel', 'foo']]], ['http://example.com', [['rel', 'up']]]]

152 """

153 return [link.to_py() for link in self.links]

154

155 def __repr__(self):

156 return "LinkHeader([%s])" % ", ".join(repr(link) for link in self.links)

157

158 def __str__(self):

159 """Formats a link header:

160

161 >>> str(LinkHeader([Link('http://example.com/foo', rel='foo'), Link('http://example.com', rel='up')]))

162 '<http://example.com/foo>; rel=foo, <http://example.com>; rel=up'

163 """

164 return ", ".join(str(link) for link in self.links)

165

166 def links_by_attr_pairs(self, pairs):

167 """Lists links that have attribute pairs matching all the supplied pairs:

168

169 >>> parse('<http://example.com/foo>; rel="foo", <http://example.com>; rel="up"'

170 ... ).links_by_attr_pairs([('rel', 'up')])

171 [Link('http://example.com', rel='up')]

172 """

173 return [

174 link

175 for link in self.links

176 if all([key, value] in link.attr_pairs for key, value in pairs)

177 ]

178

179

180class Link(object):

181 """Represents a single link."""

182

183 def __init__(self, href, attr_pairs=None, **kwargs):

184 """Initializes a Link object with an href and attributes either in

185 the form of a sequence of key/value pairs &/or as keyword arguments.

186 The sequence form allows to be repeated. Attributes may be accessed

187 subsequently via the `attr_pairs` property.

188

189 String conversion follows the spec:

190

191 >>> str(Link('http://example.com', [('foo', 'bar'), ('foo', 'baz')], rel='self'))

192 '<http://example.com>; foo=bar; foo=baz; rel=self'

193

194 Conversion to json-friendly list-based structures:

195

196 >>> Link('http://example.com', [('foo', 'bar'), ('foo', 'baz')], rel='self').to_py()

197 ['http://example.com', [['foo', 'bar'], ['foo', 'baz'], ['rel', 'self']]]

198 """

199 self.href = href

200 self.attr_pairs = [

201 list(pair) for pair in (attr_pairs or []) + list(kwargs.items())

202 ]

203

204 def to_py(self):

205 """Convert to a json-friendly list-based structure:

206

207 >>> Link('http://example.com', rel='foo').to_py()

208 ['http://example.com', [['rel', 'foo']]]

209 """

210 return [self.href, self.attr_pairs]

211

212 def __repr__(self):

213 """

214 >>> Link('http://example.com', rel='self')

215 Link('http://example.com', rel='self')

216 """

217 return "Link(%s)" % ", ".join(

218 [repr(self.href)]

219 + ["%s=%s" % (pair[0], repr(pair[1])) for pair in self.attr_pairs]

220 )

221

222 def __str__(self):

223 """Formats a single link:

224

225 >>> str(Link('http://example.com/foo', [['rel', 'self']]))

226 '<http://example.com/foo>; rel=self'

227 >>> str(Link('http://example.com/foo', [['rel', '"quoted"'], ['type', 'text/html'], ['title*', "UTF-8'en'%e2%82%ac%20rates"]]))

228 '<http://example.com/foo>; rel="\\\\"quoted\\\\""; type=text/html; title*=UTF-8\\'en\\'%e2%82%ac%20rates'

229

230 Note that there is no explicit support for the title* attribute other

231 than to output it unquoted. Where used, it is up to client applications to

232 provide values that meet RFC2231 Section 7.

233 """

234

235 def str_pair(key, value):

236 if value is None:

237 return key

238 elif RE_ONLY_TOKEN.match(value) or key.endswith("*"):

239 return "%s=%s" % (key, value)

240 else:

241 return '%s="%s"' % (key, value.replace('"', r"\""))

242

243 return "; ".join(

244 ["<%s>" % self.href]

245 + [str_pair(key, value) for key, value in self.attr_pairs]

246 )

247

248 def __getattr__(self, name):

249 """

250 >>> Link('/', rel='self').rel

251 'self'

252 >>> Link('/', hreflang='EN').hreflang

253 ['EN']

254 >>> Link('/', foo='bar').foo

255 ['bar']

256 >>> Link('/', [('foo', 'bar'), ('foo', 'baz')]).foo

257 ['bar', 'baz']

258 >>> Link('/').rel #doctest: +ELLIPSIS

259 Traceback (most recent call last):

260 ...

261 AttributeError: No attribute of type 'rel' present

262 >>> Link('/').hreflang

263 []

264 >>> Link('/').foo

265 []

266 """

267 name_lower = name.lower()

268 values = [value for key, value in self.attr_pairs if key.lower() == name_lower]

269 if name in SINGLE_VALUED_ATTRS:

270 if values:

271 return values[0]

272 else:

273 raise AttributeError("No attribute of type %r present" % name_lower)

274 return values

275

276 def __contains__(self, name):

277 """

278 >>> 'rel' in Link('/', rel='self')

279 True

280 >>> 'obs' in Link('/', obs=None)

281 True

282 >>> 'rel' in Link('/')

283 False

284 """

285 name_lower = name.lower()

286 return any(key.lower() == name_lower for key, value in self.attr_pairs)

287

288 def get_context(self, requested_resource_address):

289 """Return the absolute URI of the context of a link. This is usually

290 equals the base address the statement is about (eg. the requested URL

291 if the link header was served in a successful HTTP GET request), but

292 can be overridden by the anchor parameter.

293

294 >>> Link('../', rel='index').get_context('http://www.example.com/book1/chapter1/')

295 'http://www.example.com/book1/chapter1/'

296 >>> Link('', rel='next', anchor='../').get_context('http://www.example.com/book1/chapter1/')

297 'http://www.example.com/book1/'

298 """

299 if "anchor" in self:

300 return urljoin(requested_resource_address, self.anchor)

301 return requested_resource_address

302

303 def get_target(self, requested_resource_address):

304 """Return the absolute URI of the target of a link. It is determined by

305 joining the address from which the link header was retrieved with the

306 link-value (inside angular brackets) according to RFC3986 section 5.

307

308 >>> Link('../', rel='index').get_target('http://www.example.com/book1/chapter1/')

309 'http://www.example.com/book1/'

310 >>> Link('', rel='next', anchor='../').get_target('http://www.example.com/book1/chapter1/')

311 'http://www.example.com/book1/chapter1/'

312 """

313 return urljoin(requested_resource_address, self.href)

314

315

316class _Scanner(object):

317 def __init__(self, buf):

318 self.buf = buf

319 self.match = None

320

321 def __getitem__(self, key):

322 return self.match.group(key)

323

324 def scan(self, pattern):

325 self.match = pattern.match(self.buf)

326 if self.match:

327 self.buf = self.buf[self.match.end() :]

328 return self.match

329

330

331# For doctest

332headers: Dict[str, str] = {}

Coverage for aiocoap/util/vendored/link_header.py: 99%

92 statements