Skip to content

Commit ec7628a

Browse files
committed
add db-gen program to create DBs 3x to 20x as fast (powered by gitoxide)
It produces diffs in parallel, and despite wasting quite a bit of CPU due to less-than-stellar object access performance for diffs, it still manages to create a linux kernel database in ~21 minutes (M1 Pro).
1 parent a58f6c8 commit ec7628a

File tree

7 files changed

+607
-0
lines changed

7 files changed

+607
-0
lines changed

db-gen/.gitignore

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Created by https://www.toptal.com/developers/gitignore/api/rust
2+
# Edit at https://www.toptal.com/developers/gitignore?templates=rust
3+
4+
### Rust ###
5+
# Generated by Cargo
6+
# will have compiled files and executables
7+
debug/
8+
target/
9+
10+
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
11+
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
12+
Cargo.lock
13+
14+
# These are backup files generated by rustfmt
15+
**/*.rs.bk
16+
17+
# MSVC Windows builds of rustc generate these, which store debugging information
18+
*.pdb
19+
20+
# End of https://www.toptal.com/developers/gitignore/api/rust

db-gen/Cargo.toml

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[package]
2+
name = "db-gen"
3+
version = "0.1.0"
4+
edition = "2021"
5+
authors = ["Sebastian Thiel"]
6+
publish = false
7+
8+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
9+
10+
[dependencies]
11+
anyhow = "1.0.68"
12+
gix = { version = "0.37.2", features = ["max-performance"] }
13+
prodash = { version = "23.0.0", features = ["render-line-autoconfigure", "render-line", "render-line-crossterm", "signal-hook"] }
14+
crossbeam-channel = "0.5.6"
15+
clap = { version = "4.1.6", features = ["derive"] }
16+
rusqlite = "0.28.0"

db-gen/README.md

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Produce a repository database fast with `gitoxide`.
2+
3+
### How to run
4+
5+
Assuming an existing `Rust` installation (which can be done [with rustup][https://rustup.rs]),
6+
run the following:
7+
8+
```sh
9+
cargo run --release -- /path/to/repository
10+
```
11+
12+
This will drop the required database files in the current directory.
13+
As the database location matters, one might want to do the following instead:
14+
15+
```sh
16+
cargo build --release
17+
cd ..
18+
./db-gen/target/release/db-gen /path/to/repository
19+
```
20+
21+
That way directory files are placed where they would be expected by the visualizer.
22+

db-gen/rustfmt.toml

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
max_width = 120
2+

db-gen/src/db.rs

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
pub fn create_indices(con: &rusqlite::Connection) -> rusqlite::Result<()> {
2+
con.execute_batch("CREATE INDEX if not exists commitFileID ON commitFile (fileID)")?;
3+
con.execute_batch("CREATE INDEX if not exists commitAuthorEmail ON commitAuthor (authorEmail)")
4+
}
5+
6+
pub fn create(name: &str) -> rusqlite::Result<rusqlite::Connection> {
7+
let con = rusqlite::Connection::open(format!("{name}.db"))?;
8+
con.execute_batch(
9+
r#"
10+
CREATE TABLE if not exists commits(
11+
hash character(40) NOT NULL PRIMARY KEY,
12+
authorDate text NOT NULL,
13+
committerName text NOT NULL,
14+
committerEmail text NOT NULL,
15+
committerDate text NOT NULL
16+
)
17+
"#,
18+
)?;
19+
con.execute_batch(
20+
r#"
21+
CREATE TABLE if not exists files(
22+
fileID integer NOT NULL PRIMARY KEY,
23+
filePath text UNIQUE
24+
)
25+
"#,
26+
)?;
27+
con.execute_batch(
28+
r#"
29+
CREATE TABLE if not exists commitFile(
30+
hash character(40),
31+
fileID text,
32+
linesAdded integer,
33+
linesRemoved integer,
34+
FOREIGN KEY (hash) REFERENCES commits (hash),
35+
FOREIGN KEY (fileID) REFERENCES files (fileID),
36+
PRIMARY KEY (hash, fileID)
37+
)
38+
"#,
39+
)?;
40+
con.execute_batch(
41+
r#"
42+
CREATE TABLE if not exists author(
43+
authorEmail text NOT NULL PRIMARY KEY,
44+
authorName text NOT NULL
45+
)
46+
"#,
47+
)?;
48+
con.execute_batch(
49+
r#"
50+
CREATE TABLE if not exists commitAuthor(
51+
hash character(40),
52+
authorEmail text,
53+
FOREIGN KEY (hash) REFERENCES commits (hash),
54+
FOREIGN KEY (authorEmail) REFERENCES author (authorEmail),
55+
PRIMARY KEY (hash, authorEmail)
56+
)
57+
"#,
58+
)?;
59+
60+
Ok(con)
61+
}

0 commit comments

Comments
 (0)