mirror of https://github.com/stjepang/smol
Merge pull request #178 from Writtic/fix-web-crawler
Change piper to async-channel on web-crawler example
This commit is contained in:
commit
84bbdd03cd
|
@ -18,6 +18,7 @@ anyhow = "1.0.28"
|
||||||
async-h1 = "1.1.2"
|
async-h1 = "1.1.2"
|
||||||
async-native-tls = "0.3.3"
|
async-native-tls = "0.3.3"
|
||||||
async-std = "1.5.0"
|
async-std = "1.5.0"
|
||||||
|
async-channel = "1.1.1"
|
||||||
async-tungstenite = { version = "0.4.2", features = ["async-native-tls"] }
|
async-tungstenite = { version = "0.4.2", features = ["async-native-tls"] }
|
||||||
base64 = "0.12.0"
|
base64 = "0.12.0"
|
||||||
ctrlc = "3.1.4"
|
ctrlc = "3.1.4"
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
use std::collections::{HashSet, VecDeque};
|
use std::collections::{HashSet, VecDeque};
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use piper::Sender;
|
use async_channel::Sender;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use smol::Task;
|
use smol::Task;
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ const ROOT: &str = "https://www.rust-lang.org";
|
||||||
async fn fetch(url: String, sender: Sender<String>) {
|
async fn fetch(url: String, sender: Sender<String>) {
|
||||||
let body = surf::get(&url).recv_string().await;
|
let body = surf::get(&url).recv_string().await;
|
||||||
let body = body.unwrap_or_default();
|
let body = body.unwrap_or_default();
|
||||||
sender.send(body).await;
|
sender.send(body).await.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extracts links from a HTML body.
|
/// Extracts links from a HTML body.
|
||||||
|
@ -41,13 +41,13 @@ fn main() -> Result<()> {
|
||||||
seen.insert(ROOT.to_string());
|
seen.insert(ROOT.to_string());
|
||||||
queue.push_back(ROOT.to_string());
|
queue.push_back(ROOT.to_string());
|
||||||
|
|
||||||
let (s, r) = piper::chan(200);
|
let (s, r) = async_channel::bounded(200);
|
||||||
let mut tasks = 0;
|
let mut tasks = 0;
|
||||||
|
|
||||||
// Loop while the queue is not empty or tasks are fetching pages.
|
// Loop while the queue is not empty or tasks are fetching pages.
|
||||||
while queue.len() + tasks > 0 {
|
while queue.len() + tasks > 0 {
|
||||||
// Limit the number of concurrent tasks.
|
// Limit the number of concurrent tasks.
|
||||||
while tasks < s.capacity() {
|
while tasks < s.capacity().unwrap() {
|
||||||
// Process URLs in the queue and fetch more pages.
|
// Process URLs in the queue and fetch more pages.
|
||||||
match queue.pop_front() {
|
match queue.pop_front() {
|
||||||
None => break,
|
None => break,
|
||||||
|
|
Loading…
Reference in New Issue