lychee_lib/ratelimit/
pool.rs

1use dashmap::DashMap;
2use http::Method;
3use reqwest::{Client, Request};
4use std::collections::HashMap;
5use std::sync::Arc;
6
7use crate::ratelimit::{
8    CacheableResponse, Host, HostConfigs, HostKey, HostStats, HostStatsMap, RateLimitConfig,
9};
10use crate::types::Result;
11use crate::{ErrorKind, Uri};
12
13/// Keep track of host-specific [`reqwest::Client`]s
14pub type ClientMap = HashMap<HostKey, reqwest::Client>;
15
16/// Manages a pool of Host instances and routes requests to appropriate hosts.
17///
18/// The `HostPool` serves as the central coordinator for per-host rate limiting.
19/// It creates host instances on-demand and provides a unified interface for
20/// executing HTTP requests with appropriate rate limiting applied.
21///
22/// # Architecture
23///
24/// - Each unique hostname gets its own Host instance with dedicated rate limiting
25/// - Hosts are created lazily when first requested
26/// - Thread-safe using `DashMap` for concurrent access to host instances
27#[derive(Debug)]
28pub struct HostPool {
29    /// Map of hostname to Host instances, created on-demand
30    hosts: DashMap<HostKey, Arc<Host>>,
31
32    /// Global configuration for rate limiting defaults
33    global_config: RateLimitConfig,
34
35    /// Per-host configuration overrides
36    host_configs: HostConfigs,
37
38    /// Fallback client for hosts without host-specific client
39    default_client: Client,
40
41    /// Host-specific clients
42    client_map: ClientMap,
43}
44
45impl HostPool {
46    /// Create a new `HostPool` with the given configuration
47    #[must_use]
48    pub fn new(
49        global_config: RateLimitConfig,
50        host_configs: HostConfigs,
51        default_client: Client,
52        client_map: ClientMap,
53    ) -> Self {
54        Self {
55            hosts: DashMap::new(),
56            global_config,
57            host_configs,
58            default_client,
59            client_map,
60        }
61    }
62
63    /// Try to execute a [`Request`] with appropriate per-host rate limiting.
64    ///
65    /// # Errors
66    ///
67    /// Fails if:
68    /// - The request URL has no valid hostname
69    /// - The underlying HTTP request fails
70    pub(crate) async fn execute_request(&self, request: Request) -> Result<CacheableResponse> {
71        let url = request.url();
72        let host_key = HostKey::try_from(url)?;
73        let host = self.get_or_create_host(host_key);
74        host.execute_request(request).await
75    }
76
77    /// Try to build a [`Request`]
78    ///
79    /// # Errors
80    ///
81    /// Fails if:
82    /// - The request URI has no valid hostname
83    /// - The request fails to build
84    pub fn build_request(&self, method: Method, uri: &Uri) -> Result<Request> {
85        let host_key = HostKey::try_from(uri)?;
86        let host = self.get_or_create_host(host_key);
87        host.get_client()
88            .request(method, uri.url.clone())
89            .build()
90            .map_err(ErrorKind::BuildRequestClient)
91    }
92
93    /// Get an existing host or create a new one for the given hostname
94    fn get_or_create_host(&self, host_key: HostKey) -> Arc<Host> {
95        self.hosts
96            .entry(host_key.clone())
97            .or_insert_with(|| {
98                let host_config = self
99                    .host_configs
100                    .get(&host_key)
101                    .cloned()
102                    .unwrap_or_default();
103
104                let client = self
105                    .client_map
106                    .get(&host_key)
107                    .unwrap_or(&self.default_client)
108                    .clone();
109
110                Arc::new(Host::new(
111                    host_key,
112                    &host_config,
113                    &self.global_config,
114                    client,
115                ))
116            })
117            .value()
118            .clone()
119    }
120
121    /// Returns statistics for the host if it exists, otherwise returns empty stats.
122    /// This provides consistent behavior whether or not requests have been made to that host yet.
123    #[must_use]
124    pub fn host_stats(&self, hostname: &str) -> HostStats {
125        let host_key = HostKey::from(hostname);
126        self.hosts
127            .get(&host_key)
128            .map(|host| host.stats())
129            .unwrap_or_default()
130    }
131
132    /// Returns a `HashMap` mapping hostnames to their statistics.
133    /// Only hosts that have had requests will be included.
134    #[must_use]
135    pub fn all_host_stats(&self) -> HostStatsMap {
136        HostStatsMap::from(
137            self.hosts
138                .iter()
139                .map(|entry| {
140                    let hostname = entry.key().to_string();
141                    let stats = entry.value().stats();
142                    (hostname, stats)
143                })
144                .collect::<HashMap<_, _>>(),
145        )
146    }
147
148    /// Get the number of host instances that have been created,
149    /// which corresponds to the number of unique hostnames that have
150    /// been accessed.
151    #[must_use]
152    pub fn active_host_count(&self) -> usize {
153        self.hosts.len()
154    }
155
156    /// Get  a copy of the current host-specific configurations.
157    /// This is useful for debugging or runtime monitoring of configuration.
158    #[must_use]
159    pub fn host_configurations(&self) -> HostConfigs {
160        self.host_configs.clone()
161    }
162
163    /// Remove a host from the pool.
164    ///
165    /// This forces the host to be recreated with updated configuration
166    /// the next time a request is made to it. Any ongoing requests to
167    /// that host will continue with the old instance.
168    ///
169    /// # Returns
170    ///
171    /// Returns true if a host was removed, false if no host existed for that hostname.
172    #[must_use]
173    pub fn remove_host(&self, hostname: &str) -> bool {
174        let host_key = HostKey::from(hostname);
175        self.hosts.remove(&host_key).is_some()
176    }
177
178    /// Get cache statistics across all hosts
179    #[must_use]
180    pub fn cache_stats(&self) -> HashMap<String, (usize, f64)> {
181        self.hosts
182            .iter()
183            .map(|entry| {
184                let hostname = entry.key().to_string();
185                let cache_size = entry.value().cache_size();
186                let hit_rate = entry.value().stats().cache_hit_rate();
187                (hostname, (cache_size, hit_rate))
188            })
189            .collect()
190    }
191
192    /// Record a cache hit for the given URI in host statistics.
193    /// This tracks that a request was served from the persistent disk cache.
194    /// Note that no equivalent function for tracking cache misses is exposed,
195    /// since this is handled internally.
196    pub fn record_persistent_cache_hit(&self, uri: &crate::Uri) {
197        if !uri.is_file() && !uri.is_mail() {
198            match crate::ratelimit::HostKey::try_from(uri) {
199                Ok(key) => {
200                    let host = self.get_or_create_host(key);
201                    host.record_persistent_cache_hit();
202                }
203                Err(e) => {
204                    log::debug!("Failed to record cache hit for {uri}: {e}");
205                }
206            }
207        }
208    }
209}
210
211impl Default for HostPool {
212    fn default() -> Self {
213        Self::new(
214            RateLimitConfig::default(),
215            HostConfigs::default(),
216            Client::default(),
217            HashMap::new(),
218        )
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use crate::ratelimit::RateLimitConfig;
226
227    use url::Url;
228
229    #[test]
230    fn test_host_pool_creation() {
231        let pool = HostPool::new(
232            RateLimitConfig::default(),
233            HostConfigs::default(),
234            Client::default(),
235            HashMap::new(),
236        );
237
238        assert_eq!(pool.active_host_count(), 0);
239    }
240
241    #[test]
242    fn test_host_pool_default() {
243        let pool = HostPool::default();
244        assert_eq!(pool.active_host_count(), 0);
245    }
246
247    #[tokio::test]
248    async fn test_host_creation_on_demand() {
249        let pool = HostPool::default();
250        let url: Url = "https://example.com/path".parse().unwrap();
251        let host_key = HostKey::try_from(&url).unwrap();
252
253        // No hosts initially
254        assert_eq!(pool.active_host_count(), 0);
255        assert_eq!(pool.host_stats("example.com").total_requests, 0);
256
257        // Create host on demand
258        let host = pool.get_or_create_host(host_key);
259
260        // Now we have one host
261        assert_eq!(pool.active_host_count(), 1);
262        assert_eq!(pool.host_stats("example.com").total_requests, 0);
263        assert_eq!(host.key.as_str(), "example.com");
264    }
265
266    #[tokio::test]
267    async fn test_host_reuse() {
268        let pool = HostPool::default();
269        let url: Url = "https://example.com/path1".parse().unwrap();
270        let host_key1 = HostKey::try_from(&url).unwrap();
271
272        let url: Url = "https://example.com/path2".parse().unwrap();
273        let host_key2 = HostKey::try_from(&url).unwrap();
274
275        // Create host for first request
276        let host1 = pool.get_or_create_host(host_key1);
277        assert_eq!(pool.active_host_count(), 1);
278
279        // Second request to same host should reuse
280        let host2 = pool.get_or_create_host(host_key2);
281        assert_eq!(pool.active_host_count(), 1);
282
283        // Should be the same instance
284        assert!(Arc::ptr_eq(&host1, &host2));
285    }
286
287    #[test]
288    fn test_host_config_management() {
289        let pool = HostPool::default();
290
291        // Initially no host configurations
292        let configs = pool.host_configurations();
293        assert_eq!(configs.len(), 0);
294    }
295
296    #[test]
297    fn test_host_removal() {
298        let pool = HostPool::default();
299
300        // Remove non-existent host
301        assert!(!pool.remove_host("nonexistent.com"));
302
303        // We can't easily test removal of existing hosts without making actual requests
304        // due to the async nature of host creation, but the basic functionality works
305    }
306}