1use reqwest::Url;
2use std::collections::HashSet;
3use std::path::Path;
4
5use crate::{
6 BaseInfo, BasicAuthCredentials, LycheeResult, Request, RequestError, Uri,
7 basic_auth::BasicAuthExtractor,
8 types::{ResolvedInputSource, uri::raw::RawUri},
9};
10
11pub(crate) fn extract_credentials(
13 extractor: Option<&BasicAuthExtractor>,
14 uri: &Uri,
15) -> Option<BasicAuthCredentials> {
16 extractor.as_ref().and_then(|ext| ext.matches(uri))
17}
18
19fn create_request(
21 raw_uri: &RawUri,
22 source: &ResolvedInputSource,
23 root_dir: Option<&Path>,
24 base: &BaseInfo,
25 extractor: Option<&BasicAuthExtractor>,
26) -> LycheeResult<Request> {
27 let uri = try_parse_into_uri(raw_uri, root_dir, base)?;
28 let source = source.clone();
29 let element = raw_uri.element.clone();
30 let attribute = raw_uri.attribute.clone();
31 let credentials = extract_credentials(extractor, &uri);
32
33 Ok(Request::new(uri, source, element, attribute, credentials))
34}
35
36fn try_parse_into_uri(
48 raw_uri: &RawUri,
49 root_dir: Option<&Path>,
50 base: &BaseInfo,
51) -> LycheeResult<Uri> {
52 let root_dir = root_dir.and_then(|x| Url::from_directory_path(x).ok());
54 Ok(base
55 .parse_url_text_with_root_dir(&raw_uri.text, root_dir.as_ref())?
56 .into())
57}
58
59pub(crate) fn create(
66 uris: Vec<RawUri>,
67 source: &ResolvedInputSource,
68 root_dir: Option<&Path>,
69 fallback_base: &BaseInfo,
70 extractor: Option<&BasicAuthExtractor>,
71) -> Vec<Result<Request, RequestError>> {
72 let source_base = match source.to_url() {
73 Ok(None) => BaseInfo::none(),
74 Ok(Some(url)) => BaseInfo::from_source_url(&url),
75 Err(e) => {
76 return vec![Err(RequestError::GetInputContent(source.clone().into(), e))];
78 }
79 };
80
81 let fallback_base = fallback_base.use_fs_root_as_origin();
85 let base = source_base.or_fallback(&fallback_base);
86
87 let mut requests = HashSet::<Request>::new();
88 let mut errors = Vec::<RequestError>::new();
89
90 for raw_uri in uris {
91 let result = create_request(&raw_uri, source, root_dir, base, extractor);
92 match result {
93 Ok(request) => {
94 requests.insert(request);
95 }
96 Err(e) => errors.push(RequestError::CreateRequestItem(
97 raw_uri.clone(),
98 source.clone(),
99 e,
100 )),
101 }
102 }
103
104 (requests.into_iter().map(Result::Ok))
105 .chain(errors.into_iter().map(Result::Err))
106 .collect()
107}
108
109#[cfg(test)]
110mod tests {
111 use std::borrow::Cow;
112 use std::num::NonZeroUsize;
113 use std::path::PathBuf;
114
115 use crate::Request;
116 use crate::types::uri::raw::{RawUri, RawUriSpan};
117
118 use super::*;
119
120 fn create_ok_only(
127 uris: Vec<RawUri>,
128 source: &ResolvedInputSource,
129 root_dir: Option<&Path>,
130 base: &BaseInfo,
131 extractor: Option<&BasicAuthExtractor>,
132 ) -> Vec<Request> {
133 create(uris, source, root_dir, base, extractor)
134 .into_iter()
135 .filter_map(Result::ok)
136 .collect()
137 }
138
139 fn raw_uri(text: &'static str) -> RawUri {
140 RawUri {
141 text: text.to_string(),
142 element: None,
143 attribute: None,
144 span: RawUriSpan {
145 line: NonZeroUsize::MAX,
146 column: None,
147 },
148 }
149 }
150
151 #[test]
152 fn test_relative_url_resolution() {
153 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
154 let source = ResolvedInputSource::String(Cow::Borrowed(""));
155
156 let uris = vec![raw_uri("relative.html")];
157 let requests = create_ok_only(uris, &source, None, &base, None);
158
159 assert_eq!(requests.len(), 1);
160 assert!(
161 requests
162 .iter()
163 .any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html")
164 );
165 }
166
167 #[test]
168 fn test_absolute_url_resolution() {
169 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
170 let source = ResolvedInputSource::String(Cow::Borrowed(""));
171
172 let uris = vec![raw_uri("https://another.com/page")];
173 let requests = create_ok_only(uris, &source, None, &base, None);
174
175 assert_eq!(requests.len(), 1);
176 assert!(
177 requests
178 .iter()
179 .any(|r| r.uri.url.as_str() == "https://another.com/page")
180 );
181 }
182
183 #[test]
184 fn test_root_relative_url_resolution() {
185 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
186 let source = ResolvedInputSource::String(Cow::Borrowed(""));
187
188 let uris = vec![raw_uri("/root-relative")];
189 let requests = create_ok_only(uris, &source, None, &base, None);
190
191 assert_eq!(requests.len(), 1);
192 assert!(
193 requests
194 .iter()
195 .any(|r| r.uri.url.as_str() == "https://example.com/root-relative")
196 );
197 }
198
199 #[test]
200 fn test_parent_directory_url_resolution() {
201 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
202 let source = ResolvedInputSource::String(Cow::Borrowed(""));
203
204 let uris = vec![raw_uri("../parent")];
205 let requests = create_ok_only(uris, &source, None, &base, None);
206
207 assert_eq!(requests.len(), 1);
208 assert!(
209 requests
210 .iter()
211 .any(|r| r.uri.url.as_str() == "https://example.com/parent")
212 );
213 }
214
215 #[test]
216 fn test_fragment_url_resolution() {
217 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
218 let source = ResolvedInputSource::String(Cow::Borrowed(""));
219
220 let uris = vec![raw_uri("#fragment")];
221 let requests = create_ok_only(uris, &source, None, &base, None);
222
223 assert_eq!(requests.len(), 1);
224 assert!(
225 requests
226 .iter()
227 .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")
228 );
229 }
230
231 #[test]
232 fn test_relative_url_resolution_from_root_dir() {
233 let root_dir = PathBuf::from("/tmp/lychee");
234 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
235
236 let uris = vec![raw_uri("relative.html")];
237 let requests = create_ok_only(uris, &source, Some(&root_dir), &BaseInfo::none(), None);
238
239 assert_eq!(requests.len(), 1);
240 assert!(
241 requests
242 .iter()
243 .any(|r| r.uri.url.as_str() == "file:///some/relative.html")
244 );
245 }
246
247 #[test]
248 fn test_absolute_url_resolution_from_root_dir() {
249 let root_dir = PathBuf::from("/tmp/lychee");
250 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
251
252 let uris = vec![raw_uri("https://another.com/page")];
253 let requests = create_ok_only(uris, &source, Some(&root_dir), &BaseInfo::none(), None);
254
255 assert_eq!(requests.len(), 1);
256 assert!(
257 requests
258 .iter()
259 .any(|r| r.uri.url.as_str() == "https://another.com/page")
260 );
261 }
262
263 #[test]
264 fn test_root_relative_url_resolution_from_root_dir() {
265 let root_dir = PathBuf::from("/tmp/lychee");
266 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
267
268 let uris = vec![raw_uri("/root-relative")];
269 let requests = create_ok_only(uris, &source, Some(&root_dir), &BaseInfo::none(), None);
270
271 assert_eq!(requests.len(), 1);
272 assert!(
273 requests
274 .iter()
275 .any(|r| r.uri.url.as_str() == "file:///tmp/lychee/root-relative")
276 );
277 }
278
279 #[test]
280 fn test_parent_directory_url_resolution_from_root_dir() {
281 let root_dir = PathBuf::from("/tmp/lychee");
282 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
283
284 let uris = vec![raw_uri("../parent")];
285 let requests = create_ok_only(uris, &source, Some(&root_dir), &BaseInfo::none(), None);
286
287 assert_eq!(requests.len(), 1);
288 assert!(
289 requests
290 .iter()
291 .any(|r| r.uri.url.as_str() == "file:///parent")
292 );
293 }
294
295 #[test]
296 fn test_fragment_url_resolution_from_root_dir() {
297 let root_dir = PathBuf::from("/tmp/lychee");
298 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
299
300 let uris = vec![raw_uri("#fragment")];
301 let requests = create_ok_only(uris, &source, Some(&root_dir), &BaseInfo::none(), None);
302
303 assert_eq!(requests.len(), 1);
304 assert!(
305 requests
306 .iter()
307 .any(|r| r.uri.url.as_str() == "file:///some/page.html#fragment")
308 );
309 }
310
311 #[test]
312 fn test_relative_url_resolution_from_root_dir_and_base_url() {
313 let root_dir = PathBuf::from("/tmp/lychee");
314 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
315 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
316
317 let uris = vec![raw_uri("relative.html")];
318 let requests = create_ok_only(uris, &source, Some(&root_dir), &base, None);
319
320 assert_eq!(requests.len(), 1);
321 assert!(
322 requests
323 .iter()
324 .any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html")
325 );
326 }
327
328 #[test]
329 fn test_absolute_url_resolution_from_root_dir_and_base_url() {
330 let root_dir = PathBuf::from("/tmp/lychee");
331 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
332 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
333
334 let uris = vec![raw_uri("https://another.com/page")];
335 let requests = create_ok_only(uris, &source, Some(&root_dir), &base, None);
336
337 assert_eq!(requests.len(), 1);
338 assert!(
339 requests
340 .iter()
341 .any(|r| r.uri.url.as_str() == "https://another.com/page")
342 );
343 }
344
345 #[test]
346 fn test_root_relative_url_resolution_from_root_dir_and_base_url() {
347 let root_dir = PathBuf::from("/tmp/lychee");
348 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
349 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
350
351 let uris = vec![raw_uri("/root-relative")];
352 let requests = create_ok_only(uris, &source, Some(&root_dir), &base, None);
353
354 assert_eq!(requests.len(), 1);
355 assert!(
356 requests
357 .iter()
358 .any(|r| r.uri.url.as_str() == "https://example.com/root-relative")
359 );
360 }
361
362 #[test]
363 fn test_parent_directory_url_resolution_from_root_dir_and_base_url() {
364 let root_dir = PathBuf::from("/tmp/lychee");
365 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
366 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
367
368 let uris = vec![raw_uri("../parent")];
369 let requests = create_ok_only(uris, &source, Some(&root_dir), &base, None);
370
371 assert_eq!(requests.len(), 1);
372 assert!(
373 requests
374 .iter()
375 .any(|r| r.uri.url.as_str() == "https://example.com/parent")
376 );
377 }
378
379 #[test]
380 fn test_fragment_url_resolution_from_root_dir_and_base_url() {
381 let root_dir = PathBuf::from("/tmp/lychee");
382 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
383 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
384
385 let uris = vec![raw_uri("#fragment")];
386 let requests = create_ok_only(uris, &source, Some(&root_dir), &base, None);
387
388 assert_eq!(requests.len(), 1);
389 assert!(
390 requests
391 .iter()
392 .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")
393 );
394 }
395
396 #[test]
397 fn test_no_base_url_resolution() {
398 let source = ResolvedInputSource::String(Cow::Borrowed(""));
399
400 let uris = vec![raw_uri("https://example.com/page")];
401 let requests = create_ok_only(uris, &source, None, &BaseInfo::none(), None);
402
403 assert_eq!(requests.len(), 1);
404 assert!(
405 requests
406 .iter()
407 .any(|r| r.uri.url.as_str() == "https://example.com/page")
408 );
409 }
410
411 #[test]
412 fn test_create_request_from_relative_file_path() {
413 let base = BaseInfo::from_path(&PathBuf::from("/tmp/lychee")).unwrap();
414 let input_source = ResolvedInputSource::FsPath(PathBuf::from("page.html"));
415
416 let actual =
417 create_request(&raw_uri("file.html"), &input_source, None, &base, None).unwrap();
418
419 assert_eq!(
420 actual,
421 Request::new(
422 Uri {
423 url: Url::from_file_path("/tmp/lychee/file.html").unwrap()
424 },
425 input_source,
426 None,
427 None,
428 None,
429 )
430 );
431 }
432
433 #[test]
434 fn test_create_request_from_relative_file_path_errors() {
435 assert!(
437 create_request(
438 &raw_uri("file.html"),
439 &ResolvedInputSource::Stdin,
440 None,
441 &BaseInfo::none(),
442 None,
443 )
444 .is_err()
445 );
446
447 assert!(
449 create_request(
450 &raw_uri("/file.html"),
451 &ResolvedInputSource::FsPath(PathBuf::from("page.html")),
452 None,
453 &BaseInfo::none(),
454 None,
455 )
456 .is_err()
457 );
458 }
459
460 #[test]
461 fn test_create_request_from_absolute_file_path() {
462 let base = BaseInfo::from_path(&PathBuf::from("/tmp/lychee")).unwrap();
463 let input_source = ResolvedInputSource::FsPath(PathBuf::from("/tmp/lychee/page.html"));
464
465 let actual = create_request(
467 &raw_uri("/usr/local/share/doc/example.html"),
468 &input_source,
469 None,
470 &base,
471 None,
472 )
473 .unwrap();
474
475 assert_eq!(
476 actual,
477 Request::new(
478 Uri {
479 url: Url::from_file_path("/tmp/lychee/usr/local/share/doc/example.html")
480 .unwrap()
481 },
482 input_source,
483 None,
484 None,
485 None,
486 )
487 );
488 }
489
490 #[test]
491 fn test_parse_relative_path_into_uri() {
492 let base = BaseInfo::from_path(&PathBuf::from("/tmp/lychee")).unwrap();
493
494 let raw_uri = raw_uri("relative.html");
495 let uri = try_parse_into_uri(&raw_uri, None, &base).unwrap();
496
497 assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html");
498 }
499
500 #[test]
501 fn test_parse_absolute_path_into_uri() {
502 let base = BaseInfo::from_path(&PathBuf::from("/tmp/lychee")).unwrap();
503
504 let raw_uri = raw_uri("absolute.html");
505 let uri = try_parse_into_uri(&raw_uri, None, &base).unwrap();
506
507 assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");
508 }
509}