Skip to content

Commit

Permalink
Add web service
Browse files Browse the repository at this point in the history
  • Loading branch information
ellenhp committed Feb 5, 2024
1 parent 921eff3 commit 1df32b5
Show file tree
Hide file tree
Showing 12 changed files with 177 additions and 36 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[workspace]
resolver = "2"
members = [
"airmail", "airmail_common", "airmail_index", "airmail_parser", "parser_demo",
"airmail", "airmail_common", "airmail_index", "airmail_parser", "airmail_service", "parser_demo",
]
2 changes: 2 additions & 0 deletions airmail/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ tempfile = "3.9.0"
airmail_common = { path = "../airmail_common" }
log = "0.4.20"
airmail_parser = { path = "../airmail_parser" }
serde_json = "1"
serde = { version = "1", features = ["derive"] }
117 changes: 89 additions & 28 deletions airmail/src/index.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use airmail_parser::{component::QueryComponentType, query::QueryScenario};
use serde_json::Value;
use tantivy::{
collector::TopDocs,
directory::MmapDirectory,
Expand All @@ -18,6 +19,7 @@ pub const FIELD_ROAD: &str = "road";
pub const FIELD_UNIT: &str = "unit";
pub const FIELD_LOCALITY: &str = "locality";
pub const FIELD_REGION: &str = "region";
pub const FIELD_COUNTRY: &str = "country";
pub const FIELD_S2CELL: &str = "s2cell";
pub const FIELD_TAGS: &str = "tags";

Expand Down Expand Up @@ -67,14 +69,20 @@ impl AirmailIndex {
let _ = schema_builder.add_text_field(FIELD_UNIT, TEXT | STORED);
let _ = schema_builder.add_text_field(FIELD_LOCALITY, TEXT | STORED);
let _ = schema_builder.add_text_field(FIELD_REGION, TEXT | STORED);
let _ = schema_builder.add_text_field(FIELD_COUNTRY, TEXT | STORED);
let _ = schema_builder.add_u64_field(FIELD_S2CELL, INDEXED | STORED);
let _ = schema_builder.add_json_field(FIELD_TAGS, STORED);
schema_builder.build()
}

pub fn field_name(&self) -> tantivy::schema::Field {
self.tantivy_index.schema().get_field(FIELD_NAME).unwrap()
}

pub fn field_source(&self) -> tantivy::schema::Field {
self.tantivy_index.schema().get_field(FIELD_SOURCE).unwrap()
}

pub fn field_house_number(&self) -> tantivy::schema::Field {
self.tantivy_index
.schema()
Expand All @@ -97,21 +105,25 @@ impl AirmailIndex {
.unwrap()
}

pub fn field_region(&self) -> tantivy::schema::Field {
self.tantivy_index.schema().get_field(FIELD_REGION).unwrap()
}

pub fn field_country(&self) -> tantivy::schema::Field {
self.tantivy_index
.schema()
.get_field(FIELD_COUNTRY)
.unwrap()
}

pub fn field_s2cell(&self) -> tantivy::schema::Field {
self.tantivy_index.schema().get_field(FIELD_S2CELL).unwrap()
}

pub fn field_region(&self) -> tantivy::schema::Field {
self.tantivy_index.schema().get_field(FIELD_REGION).unwrap()
pub fn field_tags(&self) -> tantivy::schema::Field {
self.tantivy_index.schema().get_field(FIELD_TAGS).unwrap()
}

// pub fn field_country(&self) -> tantivy::schema::Field {
// self.tantivy_index
// .schema()
// .get_field(FIELD_COUNTRY)
// .unwrap()
// }

pub fn create(index_dir: &str) -> Result<Self, Box<dyn std::error::Error>> {
let schema = Self::schema();
let tantivy_index =
Expand All @@ -133,7 +145,10 @@ impl AirmailIndex {
Ok(writer)
}

pub fn search(&self, query: &QueryScenario) -> Result<Vec<String>, Box<dyn std::error::Error>> {
pub fn search(
&self,
query: &QueryScenario,
) -> Result<Vec<AirmailPoi>, Box<dyn std::error::Error>> {
let tantivy_reader = self.tantivy_index.reader()?;
let searcher = tantivy_reader.searcher();
let mut queries: Vec<Box<dyn Query>> = Vec::new();
Expand Down Expand Up @@ -193,12 +208,17 @@ impl AirmailIndex {
self.field_region(),
term_strs,
is_prefix,
1,
0, // Regions are often abbreviations, so no fuzzy matching.
)?);
}

QueryComponentType::CountryComponent => {
// No-op
queries.push(query_for_terms(
self.field_country(),
term_strs,
is_prefix,
0, // Countries are often abbreviations, so no fuzzy matching.
)?);
}

QueryComponentType::PlaceNameComponent => {
Expand Down Expand Up @@ -230,22 +250,30 @@ impl AirmailIndex {
let mut results = Vec::new();
for (_score, doc_address) in top_docs {
let doc = searcher.doc(doc_address)?;
let house_num: Vec<&str> = doc
.get_all(self.field_house_number())
.filter_map(|v| v.as_text())
.collect();
let road: Vec<&str> = doc
.get_all(self.field_road())
.filter_map(|v| v.as_text())
.collect();
let unit: Vec<&str> = doc
.get_all(self.field_unit())
.filter_map(|v| v.as_text())
.collect();
let house_num: Option<&str> = doc
.get_first(self.field_house_number())
.map(|v| v.as_text())
.flatten();
let road: Option<&str> = doc
.get_first(self.field_road())
.map(|v| v.as_text())
.flatten();
let unit: Option<&str> = doc
.get_first(self.field_unit())
.map(|v| v.as_text())
.flatten();
let locality: Vec<&str> = doc
.get_all(self.field_locality())
.filter_map(|v| v.as_text())
.collect();
let region: Option<&str> = doc
.get_first(self.field_region())
.map(|v| v.as_text())
.flatten();
let country: Option<&str> = doc
.get_first(self.field_country())
.map(|v| v.as_text())
.flatten();
let s2cell = doc
.get_first(self.field_s2cell())
.unwrap()
Expand All @@ -257,11 +285,34 @@ impl AirmailIndex {
.get_all(self.field_name())
.filter_map(|v| v.as_text())
.collect::<Vec<&str>>();
let tags: Vec<(String, String)> = doc
.get_first(self.field_tags())
.map(|v| v.as_json().unwrap())
.cloned()
.unwrap_or_default()
.iter()
.map(|(k, v)| (k.clone(), v.as_str().unwrap().to_string()))
.collect();

results.push(format!(
"names: {:?}, house_num: {:?}, road: {:?}, unit: {:?}, locality: {:?}, latlng: {:?}",
names, house_num, road, unit, locality, latlng
));
let mut poi = AirmailPoi::new(
names.iter().map(|s| s.to_string()).collect(),
doc.get_first(self.field_source())
.unwrap()
.as_text()
.unwrap()
.to_string(),
vec![], // FIXME
house_num.iter().map(|s| s.to_string()).collect(),
road.iter().map(|s| s.to_string()).collect(),
unit.iter().map(|s| s.to_string()).collect(),
latlng.lat.deg(),
latlng.lng.deg(),
tags,
)?;
poi.locality = locality.iter().map(|s| s.to_string()).collect();
poi.region = region.map(|s| s.to_string()).into_iter().collect();
poi.country = country.map(|s| s.to_string()).into_iter().collect();
results.push(poi);
}

Ok(results)
Expand Down Expand Up @@ -301,6 +352,16 @@ impl AirmailIndexWriter {
for region in poi.region {
doc.add_text(self.schema.get_field(FIELD_REGION).unwrap(), region);
}
for country in poi.country {
doc.add_text(self.schema.get_field(FIELD_COUNTRY).unwrap(), country);
}
doc.add_json_object(
self.schema.get_field(FIELD_TAGS).unwrap(),
poi.tags
.iter()
.map(|(k, v)| (k.to_string(), serde_json::Value::String(v.to_string())))
.collect::<serde_json::Map<String, Value>>(),
);
doc.add_u64(self.schema.get_field(FIELD_S2CELL).unwrap(), poi.s2cell);
self.tantivy_writer.add_document(doc)?;

Expand Down
1 change: 0 additions & 1 deletion airmail/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
pub mod index;
pub mod parser;
pub mod poi;
1 change: 0 additions & 1 deletion airmail/src/parser.rs

This file was deleted.

3 changes: 2 additions & 1 deletion airmail/src/poi.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use std::error::Error;

use airmail_common::categories::PoiCategory;
use serde::{Deserialize, Serialize};

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AirmailPoi {
pub name: Vec<String>,
pub source: String,
Expand Down
4 changes: 2 additions & 2 deletions airmail_index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ reqwest = { version = "0.11", features = ["json"] }
tokio = { version = "1", features = ["full"] }
crossbeam = { version = "0.8.4", features = ["crossbeam-channel"] }
rayon = "1.8.1"
serde_json = "1.0.113"
serde_json = "1"
s2 = "0.0.12"
geojson = "0.24.1"
serde = { version = "1.0.196", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
airmail_common = { path = "../airmail_common" }
lazy_static = "1.4.0"
bollard = "0.15.0"
Expand Down
2 changes: 1 addition & 1 deletion airmail_index/src/bin/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!();
if let Some(results) = results {
for result in &results {
println!(" - {}", result);
println!(" - {:?}", result);
}
println!("{} results found in {:?}", results.len(), start.elapsed());
} else {
Expand Down
3 changes: 3 additions & 0 deletions airmail_index/src/openstreetmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,13 @@ pub fn parse_osm<CB: Sync + Fn(AirmailPoi) -> Result<(), Box<dyn std::error::Err
},
)?;

println!("Beginning second pass. Expect fewer log messages. Indexing is still progressing.");

let file = File::open(pbf_path)?;
let reader = osmpbf::reader::ElementReader::new(file);
let ways_of_interest = ways_of_interest.into_iter().collect::<HashMap<_, _>>();
reader.for_each(|obj| {
// FIXME: This could be a node?
if let Element::Way(way) = obj {
if ways_of_interest.contains_key(&way.id()) {
let tags = ways_of_interest.get(&way.id()).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion airmail_index/src/substitutions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ pub(super) fn permute_housenum(housenum: &str) -> Result<Vec<String>, Box<dyn Er
}

pub(super) fn permute_road(road: &str) -> Result<Vec<String>, Box<dyn Error>> {
let mut permutations = Vec::new();
let road = sanitize(road);
let mut permutations = vec![road.clone()];
// This may be a bad way of handling it, I don't know enough about non-ascii whitespace to be sure.
let road_components: Vec<Vec<String>> = road
.split_whitespace()
Expand Down
18 changes: 18 additions & 0 deletions airmail_service/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "airmail_service"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
axum = "0.7.4"
tokio = { version = "1.36.0", features = ["full"] }
airmail = { path = "../airmail" }
airmail_parser = { path = "../airmail_parser" }
env_logger = "0.11.1"
deunicode = "1.4.2"
log = "0.4.20"
clap = { version = "4.4.18", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
58 changes: 58 additions & 0 deletions airmail_service/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use std::{collections::HashMap, sync::Arc};

use airmail::{index::AirmailIndex, poi::AirmailPoi};
use axum::{
extract::{Query, State},
routing::get,
Json, Router,
};
use clap::Parser;
use deunicode::deunicode;
use log::trace;
use serde_json::Value;

#[derive(Debug, Parser)]
struct Args {
#[clap(long, short)]
index_path: String,
}

async fn search(
Query(params): Query<HashMap<String, String>>,
State(index): State<Arc<AirmailIndex>>,
) -> Json<Value> {
let query = params.get("q").unwrap();
trace!("searching for {:?}", query);
let query = deunicode(query.trim()).to_lowercase();
let parsed = airmail_parser::query::Query::parse(&query);

let scenarios = parsed.scenarios();
let results: Option<Vec<AirmailPoi>> = scenarios
.iter()
.take(10)
.filter_map(|scenario| {
let results = index.search(scenario).unwrap();
if results.is_empty() {
None
} else {
Some(results)
}
})
.next();

if let Some(results) = results {
Json(serde_json::to_value(results).unwrap())
} else {
Json(serde_json::Value::Array(vec![]))
}
}

#[tokio::main]
async fn main() {
env_logger::init();
let args = Args::parse();
let index = Arc::new(AirmailIndex::new(&args.index_path).unwrap());
let app = Router::new().route("/search", get(search).with_state(index));
let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap();
axum::serve(listener, app).await.unwrap();
}

0 comments on commit 1df32b5

Please # to comment.