third_party: bump everything
parent
08f6dc3ac5
commit
2938a31682
|
@ -14,7 +14,7 @@ licenses([
|
||||||
# Aliased targets
|
# Aliased targets
|
||||||
alias(
|
alias(
|
||||||
name = "cgmath",
|
name = "cgmath",
|
||||||
actual = "//third_party/cargo/vendor/cgmath-0.17.0:cgmath",
|
actual = "//third_party/cargo/vendor/cgmath-0.18.0:cgmath",
|
||||||
tags = [
|
tags = [
|
||||||
"cargo-raze",
|
"cargo-raze",
|
||||||
"manual",
|
"manual",
|
||||||
|
@ -41,7 +41,7 @@ alias(
|
||||||
|
|
||||||
alias(
|
alias(
|
||||||
name = "image",
|
name = "image",
|
||||||
actual = "//third_party/cargo/vendor/image-0.23.4:image",
|
actual = "//third_party/cargo/vendor/image-0.23.12:image",
|
||||||
tags = [
|
tags = [
|
||||||
"cargo-raze",
|
"cargo-raze",
|
||||||
"manual",
|
"manual",
|
||||||
|
@ -50,16 +50,7 @@ alias(
|
||||||
|
|
||||||
alias(
|
alias(
|
||||||
name = "log",
|
name = "log",
|
||||||
actual = "//third_party/cargo/vendor/log-0.4.8:log",
|
actual = "//third_party/cargo/vendor/log-0.4.11:log",
|
||||||
tags = [
|
|
||||||
"cargo-raze",
|
|
||||||
"manual",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
alias(
|
|
||||||
name = "openvr",
|
|
||||||
actual = "//third_party/cargo/vendor/openvr-0.6.0:openvr",
|
|
||||||
tags = [
|
tags = [
|
||||||
"cargo-raze",
|
"cargo-raze",
|
||||||
"manual",
|
"manual",
|
||||||
|
@ -68,7 +59,7 @@ alias(
|
||||||
|
|
||||||
alias(
|
alias(
|
||||||
name = "vulkano",
|
name = "vulkano",
|
||||||
actual = "//third_party/cargo/vendor/vulkano-0.18.0:vulkano",
|
actual = "//third_party/cargo/vendor/vulkano-0.20.0:vulkano",
|
||||||
tags = [
|
tags = [
|
||||||
"cargo-raze",
|
"cargo-raze",
|
||||||
"manual",
|
"manual",
|
||||||
|
@ -77,7 +68,7 @@ alias(
|
||||||
|
|
||||||
alias(
|
alias(
|
||||||
name = "vulkano_win",
|
name = "vulkano_win",
|
||||||
actual = "//third_party/cargo/vendor/vulkano-win-0.18.0:vulkano_win",
|
actual = "//third_party/cargo/vendor/vulkano-win-0.20.0:vulkano_win",
|
||||||
tags = [
|
tags = [
|
||||||
"cargo-raze",
|
"cargo-raze",
|
||||||
"manual",
|
"manual",
|
||||||
|
@ -86,7 +77,7 @@ alias(
|
||||||
|
|
||||||
alias(
|
alias(
|
||||||
name = "winit",
|
name = "winit",
|
||||||
actual = "//third_party/cargo/vendor/winit-0.22.2:winit",
|
actual = "//third_party/cargo/vendor/winit-0.24.0:winit",
|
||||||
tags = [
|
tags = [
|
||||||
"cargo-raze",
|
"cargo-raze",
|
||||||
"manual",
|
"manual",
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -10,11 +10,11 @@ path = "fake_lib.rs"
|
||||||
env_logger = "0.6.1"
|
env_logger = "0.6.1"
|
||||||
image = "0.23.1"
|
image = "0.23.1"
|
||||||
log = "0.4.6"
|
log = "0.4.6"
|
||||||
vulkano = "0.18.0"
|
vulkano = "0.20.0"
|
||||||
vulkano-win = "0.18.0"
|
vulkano-win = "0.20.0"
|
||||||
winit = "0.22.0"
|
winit = "0.24.0"
|
||||||
cgmath = "0.17.0"
|
cgmath = "0.18.0"
|
||||||
openvr = "0.6.0"
|
#openvr = "0.6.0"
|
||||||
flatbuffers = "0.6.1"
|
flatbuffers = "0.6.1"
|
||||||
|
|
||||||
[workspace.metadata.raze]
|
[workspace.metadata.raze]
|
||||||
|
@ -43,11 +43,23 @@ additional_flags = [
|
||||||
"--cfg=atomic_cas"
|
"--cfg=atomic_cas"
|
||||||
]
|
]
|
||||||
|
|
||||||
[workspace.metadata.raze.crates.libloading.'0.6.2']
|
[workspace.metadata.raze.crates.libloading.'0.6.6']
|
||||||
additional_deps = ['//third_party/cargo/patches:libloading_global_static']
|
additional_deps = ['//third_party/cargo/patches:libloading_global_static']
|
||||||
|
|
||||||
[workspace.metadata.raze.crates.cgmath.'0.17.0']
|
[workspace.metadata.raze.crates.cgmath.'0.18.0']
|
||||||
gen_buildrs = true
|
gen_buildrs = true
|
||||||
|
|
||||||
[workspace.metadata.raze.crates.image.'0.23.4']
|
[workspace.metadata.raze.crates.image.'0.23.12']
|
||||||
|
gen_buildrs = true
|
||||||
|
|
||||||
|
[workspace.metadata.raze.crates.num-rational.'0.3.2']
|
||||||
|
gen_buildrs = true
|
||||||
|
|
||||||
|
[workspace.metadata.raze.crates.libc.'0.2.82']
|
||||||
|
gen_buildrs = true
|
||||||
|
|
||||||
|
[workspace.metadata.raze.crates.wayland-client.'0.28.3']
|
||||||
|
gen_buildrs = true
|
||||||
|
|
||||||
|
[workspace.metadata.raze.crates.wayland-protocols.'0.28.3']
|
||||||
gen_buildrs = true
|
gen_buildrs = true
|
||||||
|
|
|
@ -2,5 +2,5 @@ cc_library(
|
||||||
name = "libloading_global_static",
|
name = "libloading_global_static",
|
||||||
srcs = ["libloading/global_static.c"],
|
srcs = ["libloading/global_static.c"],
|
||||||
copts = ["-fPIC"],
|
copts = ["-fPIC"],
|
||||||
visibility = ["//third_party/cargo/vendor/libloading-0.6.2:__pkg__"],
|
visibility = ["//third_party/cargo/vendor/libloading-0.6.6:__pkg__"],
|
||||||
)
|
)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
{"files":{"CHANGELOG.md":"12997ba678cbef530fcd89e4e9b071bad7a63f2c21127a9895d12a71c4762f72","Cargo.toml":"74cdeb71983a9e460620aeba81a0ea6c98d6b656558d6b17bfff3e64c172a071","README.md":"59162e54bce67b4181f793866a73e4906b3cb4b45f3487f045aca2ce06611a80","src/geometry.rs":"8d970a944af7199ac6a42ace5d1ca661f7764d10a2af0eb09f7b356547f80cf8","src/lib.rs":"32f718b6be690d4d22fa60bf2d2f3b73f645e293a12f0e7c969c7ff2ac2f0a54","src/nostd_float.rs":"425e4f7a3c20213d561a376a09cb75a37ba3989b42e1700a3b15f642ccb99918","src/raster.rs":"49171451f8447d5200da96c5698cb4cd9e4d1556bb8d4cc76e998d48297d4f95","tests/issues.rs":"dff1f0f9992a49a71b3ac4e298033fe9687194a7948bdf29b110daa1ccc99790"},"package":"d9fe5e32de01730eb1f6b7f5b51c17e03e2325bf40a74f754f04f130043affff"}
|
|
@ -0,0 +1,57 @@
|
||||||
|
"""
|
||||||
|
@generated
|
||||||
|
cargo-raze crate build file.
|
||||||
|
|
||||||
|
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||||
|
"""
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load(
|
||||||
|
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||||
|
"rust_binary",
|
||||||
|
"rust_library",
|
||||||
|
"rust_test",
|
||||||
|
)
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||||
|
|
||||||
|
package(default_visibility = [
|
||||||
|
# Public for visibility by "@raze__crate__version//" targets.
|
||||||
|
#
|
||||||
|
# Prefer access through "//third_party/cargo", which limits external
|
||||||
|
# visibility to explicit Cargo.toml dependencies.
|
||||||
|
"//visibility:public",
|
||||||
|
])
|
||||||
|
|
||||||
|
licenses([
|
||||||
|
"notice", # Apache-2.0 from expression "Apache-2.0"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Generated Targets
|
||||||
|
|
||||||
|
rust_library(
|
||||||
|
name = "ab_glyph_rasterizer",
|
||||||
|
srcs = glob(["**/*.rs"]),
|
||||||
|
crate_features = [
|
||||||
|
"default",
|
||||||
|
"std",
|
||||||
|
],
|
||||||
|
crate_root = "src/lib.rs",
|
||||||
|
crate_type = "lib",
|
||||||
|
data = [],
|
||||||
|
edition = "2018",
|
||||||
|
rustc_flags = [
|
||||||
|
"--cap-lints=allow",
|
||||||
|
],
|
||||||
|
tags = [
|
||||||
|
"cargo-raze",
|
||||||
|
"manual",
|
||||||
|
],
|
||||||
|
version = "0.1.4",
|
||||||
|
# buildifier: leave-alone
|
||||||
|
deps = [
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Unsupported target "issues" with type "test" omitted
|
|
@ -0,0 +1,16 @@
|
||||||
|
# 0.1.4
|
||||||
|
* Add `Rasterizer::reset`, `Rasterizer::clear` methods to allow allocation reuse.
|
||||||
|
|
||||||
|
# 0.1.3
|
||||||
|
* Fix index oob panic scenario.
|
||||||
|
|
||||||
|
# 0.1.2
|
||||||
|
* For `Point` implement `Sub`, `Add`, `SubAssign`, `AddAssign`, `PartialEq`, `PartialOrd`, `From<(x, y)>`,
|
||||||
|
`From<[x, y]>` for easier use downstream.
|
||||||
|
* Switch `Point` `Debug` implementation to output `point(1.2, 3.4)` smaller representation referring to the `point` fn.
|
||||||
|
|
||||||
|
# 0.1.1
|
||||||
|
* Add explicit compile error when building no_std without the "libm" feature.
|
||||||
|
|
||||||
|
# 0.1
|
||||||
|
* Implement zero dependency coverage rasterization for lines, quadratic & cubic beziers.
|
|
@ -0,0 +1,31 @@
|
||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||||
|
#
|
||||||
|
# When uploading crates to the registry Cargo will automatically
|
||||||
|
# "normalize" Cargo.toml files for maximal compatibility
|
||||||
|
# with all versions of Cargo and also rewrite `path` dependencies
|
||||||
|
# to registry (e.g., crates.io) dependencies
|
||||||
|
#
|
||||||
|
# If you believe there's an error in this file please file an
|
||||||
|
# issue against the rust-lang/cargo repository. If you're
|
||||||
|
# editing this file be aware that the upstream Cargo.toml
|
||||||
|
# will likely look very different (and much more reasonable)
|
||||||
|
|
||||||
|
[package]
|
||||||
|
edition = "2018"
|
||||||
|
name = "ab_glyph_rasterizer"
|
||||||
|
version = "0.1.4"
|
||||||
|
authors = ["Alex Butler <alexheretic@gmail.com>"]
|
||||||
|
description = "Coverage rasterization for lines, quadratic & cubic beziers"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = ["text", "ttf", "otf", "font"]
|
||||||
|
license = "Apache-2.0"
|
||||||
|
repository = "https://github.com/alexheretic/ab-glyph"
|
||||||
|
[dependencies.libm]
|
||||||
|
version = "0.2.1"
|
||||||
|
optional = true
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["std"]
|
||||||
|
std = []
|
|
@ -0,0 +1,51 @@
|
||||||
|
ab_glyph_rasterizer
|
||||||
|
[![crates.io](https://img.shields.io/crates/v/ab_glyph_rasterizer.svg)](https://crates.io/crates/ab_glyph_rasterizer)
|
||||||
|
[![Documentation](https://docs.rs/ab_glyph_rasterizer/badge.svg)](https://docs.rs/ab_glyph_rasterizer)
|
||||||
|
===================
|
||||||
|
Coverage rasterization for lines, quadratic & cubic beziers.
|
||||||
|
Useful for drawing .otf font glyphs.
|
||||||
|
|
||||||
|
Inspired by [font-rs](https://github.com/raphlinus/font-rs) &
|
||||||
|
[stb_truetype](https://github.com/nothings/stb/blob/master/stb_truetype.h).
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
```rust
|
||||||
|
let mut rasterizer = ab_glyph_rasterizer::Rasterizer::new(106, 183);
|
||||||
|
|
||||||
|
// draw a 300px 'ę' character
|
||||||
|
rasterizer.draw_cubic(point(103.0, 163.5), point(86.25, 169.25), point(77.0, 165.0), point(82.25, 151.5));
|
||||||
|
rasterizer.draw_cubic(point(82.25, 151.5), point(86.75, 139.75), point(94.0, 130.75), point(102.0, 122.0));
|
||||||
|
rasterizer.draw_line(point(102.0, 122.0), point(100.25, 111.25));
|
||||||
|
rasterizer.draw_cubic(point(100.25, 111.25), point(89.0, 112.75), point(72.75, 114.25), point(58.5, 114.25));
|
||||||
|
rasterizer.draw_cubic(point(58.5, 114.25), point(30.75, 114.25), point(18.5, 105.25), point(16.75, 72.25));
|
||||||
|
rasterizer.draw_line(point(16.75, 72.25), point(77.0, 72.25));
|
||||||
|
rasterizer.draw_cubic(point(77.0, 72.25), point(97.0, 72.25), point(105.25, 60.25), point(104.75, 38.5));
|
||||||
|
rasterizer.draw_cubic(point(104.75, 38.5), point(104.5, 13.5), point(89.0, 0.75), point(54.25, 0.75));
|
||||||
|
rasterizer.draw_cubic(point(54.25, 0.75), point(16.0, 0.75), point(0.0, 16.75), point(0.0, 64.0));
|
||||||
|
rasterizer.draw_cubic(point(0.0, 64.0), point(0.0, 110.5), point(16.0, 128.0), point(56.5, 128.0));
|
||||||
|
rasterizer.draw_cubic(point(56.5, 128.0), point(66.0, 128.0), point(79.5, 127.0), point(90.0, 125.0));
|
||||||
|
rasterizer.draw_cubic(point(90.0, 125.0), point(78.75, 135.25), point(73.25, 144.5), point(70.75, 152.0));
|
||||||
|
rasterizer.draw_cubic(point(70.75, 152.0), point(64.5, 169.0), point(75.5, 183.0), point(105.0, 170.5));
|
||||||
|
rasterizer.draw_line(point(105.0, 170.5), point(103.0, 163.5));
|
||||||
|
rasterizer.draw_cubic(point(55.0, 14.5), point(78.5, 14.5), point(88.5, 21.75), point(88.75, 38.75));
|
||||||
|
rasterizer.draw_cubic(point(88.75, 38.75), point(89.0, 50.75), point(85.75, 59.75), point(73.5, 59.75));
|
||||||
|
rasterizer.draw_line(point(73.5, 59.75), point(16.5, 59.75));
|
||||||
|
rasterizer.draw_cubic(point(16.5, 59.75), point(17.25, 25.5), point(27.0, 14.5), point(55.0, 14.5));
|
||||||
|
rasterizer.draw_line(point(55.0, 14.5), point(55.0, 14.5));
|
||||||
|
|
||||||
|
// iterate over the resultant pixel alphas, e.g. save pixel to a buffer
|
||||||
|
rasterizer.for_each_pixel(|index, alpha| {
|
||||||
|
// ...
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
Rendering the resultant pixel alphas as 8-bit grey produces:
|
||||||
|
|
||||||
|
![reference_otf_tailed_e](https://user-images.githubusercontent.com/2331607/78987793-ee95f480-7b26-11ea-91fb-e9f359d766f8.png)
|
||||||
|
|
||||||
|
## no_std
|
||||||
|
no_std environments are supported using `alloc` & [`libm`](https://github.com/rust-lang/libm).
|
||||||
|
```toml
|
||||||
|
ab_glyph_rasterizer = { default-features = false, features = ["libm"] }
|
||||||
|
```
|
|
@ -0,0 +1,148 @@
|
||||||
|
#[cfg(all(feature = "libm", not(feature = "std")))]
|
||||||
|
use crate::nostd_float::FloatExt;
|
||||||
|
|
||||||
|
/// An (x, y) coordinate.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```
|
||||||
|
/// use ab_glyph_rasterizer::{point, Point};
|
||||||
|
/// let p: Point = point(0.1, 23.2);
|
||||||
|
/// ```
|
||||||
|
#[derive(Clone, Copy, Default, PartialEq, PartialOrd)]
|
||||||
|
pub struct Point {
|
||||||
|
pub x: f32,
|
||||||
|
pub y: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::fmt::Debug for Point {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
write!(f, "point({:?}, {:?})", self.x, self.y)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Point {
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn distance_to(self, other: Point) -> f32 {
|
||||||
|
let d = other - self;
|
||||||
|
(d.x * d.x + d.y * d.y).sqrt()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// [`Point`](struct.Point.html) constructor.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::{point, Point};
|
||||||
|
/// let p = point(0.1, 23.2);
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn point(x: f32, y: f32) -> Point {
|
||||||
|
Point { x, y }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Linear interpolation between points.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn lerp(t: f32, p0: Point, p1: Point) -> Point {
|
||||||
|
point(p0.x + t * (p1.x - p0.x), p0.y + t * (p1.y - p0.y))
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::Sub for Point {
|
||||||
|
type Output = Point;
|
||||||
|
/// Subtract rhs.x from x, rhs.y from y.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// let p1 = point(1.0, 2.0) - point(2.0, 1.5);
|
||||||
|
///
|
||||||
|
/// assert!((p1.x - -1.0).abs() <= core::f32::EPSILON);
|
||||||
|
/// assert!((p1.y - 0.5).abs() <= core::f32::EPSILON);
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
fn sub(self, rhs: Point) -> Point {
|
||||||
|
point(self.x - rhs.x, self.y - rhs.y)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::Add for Point {
|
||||||
|
type Output = Point;
|
||||||
|
/// Add rhs.x to x, rhs.y to y.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// let p1 = point(1.0, 2.0) + point(2.0, 1.5);
|
||||||
|
///
|
||||||
|
/// assert!((p1.x - 3.0).abs() <= core::f32::EPSILON);
|
||||||
|
/// assert!((p1.y - 3.5).abs() <= core::f32::EPSILON);
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
fn add(self, rhs: Point) -> Point {
|
||||||
|
point(self.x + rhs.x, self.y + rhs.y)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::AddAssign for Point {
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// let mut p1 = point(1.0, 2.0);
|
||||||
|
/// p1 += point(2.0, 1.5);
|
||||||
|
///
|
||||||
|
/// assert!((p1.x - 3.0).abs() <= core::f32::EPSILON);
|
||||||
|
/// assert!((p1.y - 3.5).abs() <= core::f32::EPSILON);
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
fn add_assign(&mut self, other: Self) {
|
||||||
|
self.x += other.x;
|
||||||
|
self.y += other.y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::SubAssign for Point {
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// let mut p1 = point(1.0, 2.0);
|
||||||
|
/// p1 -= point(2.0, 1.5);
|
||||||
|
///
|
||||||
|
/// assert!((p1.x - -1.0).abs() <= core::f32::EPSILON);
|
||||||
|
/// assert!((p1.y - 0.5).abs() <= core::f32::EPSILON);
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
fn sub_assign(&mut self, other: Self) {
|
||||||
|
self.x -= other.x;
|
||||||
|
self.y -= other.y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F: Into<f32>> From<(F, F)> for Point {
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// let p: Point = (23_f32, 34.5_f32).into();
|
||||||
|
/// let p2: Point = (5u8, 44u8).into();
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
fn from((x, y): (F, F)) -> Self {
|
||||||
|
point(x.into(), y.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F: Into<f32>> From<[F; 2]> for Point {
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// let p: Point = [23_f32, 34.5].into();
|
||||||
|
/// let p2: Point = [5u8, 44].into();
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
fn from([x, y]: [F; 2]) -> Self {
|
||||||
|
point(x.into(), y.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn distance_to() {
|
||||||
|
let distance = point(0.0, 0.0).distance_to(point(3.0, 4.0));
|
||||||
|
assert!((distance - 5.0).abs() <= core::f32::EPSILON);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
//! Coverage rasterization for lines, quadratic & cubic beziers.
|
||||||
|
//! Useful for drawing .otf font glyphs.
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use ab_glyph_rasterizer::Rasterizer;
|
||||||
|
//! # let (width, height) = (1, 1);
|
||||||
|
//! let mut rasterizer = Rasterizer::new(width, height);
|
||||||
|
//!
|
||||||
|
//! // draw outlines
|
||||||
|
//! # let [l0, l1, q0, q1, q2, c0, c1, c2, c3] = [ab_glyph_rasterizer::point(0.0, 0.0); 9];
|
||||||
|
//! rasterizer.draw_line(l0, l1);
|
||||||
|
//! rasterizer.draw_quad(q0, q1, q2);
|
||||||
|
//! rasterizer.draw_cubic(c0, c1, c2, c3);
|
||||||
|
//!
|
||||||
|
//! // iterate over the resultant pixel alphas, e.g. save pixel to a buffer
|
||||||
|
//! rasterizer.for_each_pixel(|index, alpha| {
|
||||||
|
//! // ...
|
||||||
|
//! });
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
#![cfg_attr(not(feature = "std"), no_std)]
|
||||||
|
#[cfg(not(feature = "std"))]
|
||||||
|
#[macro_use]
|
||||||
|
extern crate alloc;
|
||||||
|
|
||||||
|
#[cfg(all(feature = "libm", not(feature = "std")))]
|
||||||
|
mod nostd_float;
|
||||||
|
|
||||||
|
#[cfg(not(any(feature = "libm", feature = "std")))]
|
||||||
|
compile_error!("You need to activate either the `std` or `libm` feature.");
|
||||||
|
|
||||||
|
mod geometry;
|
||||||
|
mod raster;
|
||||||
|
|
||||||
|
pub use geometry::{point, Point};
|
||||||
|
pub use raster::Rasterizer;
|
|
@ -0,0 +1,31 @@
|
||||||
|
/// Basic required float operations.
|
||||||
|
pub(crate) trait FloatExt {
|
||||||
|
fn floor(self) -> Self;
|
||||||
|
fn ceil(self) -> Self;
|
||||||
|
fn sqrt(self) -> Self;
|
||||||
|
fn round(self) -> Self;
|
||||||
|
fn abs(self) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FloatExt for f32 {
|
||||||
|
#[inline]
|
||||||
|
fn floor(self) -> Self {
|
||||||
|
libm::floorf(self)
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
fn ceil(self) -> Self {
|
||||||
|
libm::ceilf(self)
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
fn sqrt(self) -> Self {
|
||||||
|
libm::sqrtf(self)
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
fn round(self) -> Self {
|
||||||
|
libm::roundf(self)
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
fn abs(self) -> Self {
|
||||||
|
libm::fabsf(self)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,286 @@
|
||||||
|
// Forked/repurposed from `font-rs` code: https://github.com/raphlinus/font-rs
|
||||||
|
// Copyright 2015 Google Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Modifications copyright (C) 2020 Alex Butler
|
||||||
|
//
|
||||||
|
// Cubic bezier drawing adapted from stb_truetype: https://github.com/nothings/stb
|
||||||
|
#[cfg(all(feature = "libm", not(feature = "std")))]
|
||||||
|
use crate::nostd_float::FloatExt;
|
||||||
|
#[cfg(not(feature = "std"))]
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
|
use crate::geometry::{lerp, Point};
|
||||||
|
|
||||||
|
/// Coverage rasterizer for lines, quadratic & cubic beziers.
|
||||||
|
pub struct Rasterizer {
|
||||||
|
width: usize,
|
||||||
|
height: usize,
|
||||||
|
a: Vec<f32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Rasterizer {
|
||||||
|
/// Allocates a new rasterizer that can draw onto a `width` x `height` alpha grid.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use ab_glyph_rasterizer::Rasterizer;
|
||||||
|
/// let mut rasterizer = Rasterizer::new(14, 38);
|
||||||
|
/// ```
|
||||||
|
pub fn new(width: usize, height: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
a: vec![0.0; width * height + 4],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resets the rasterizer to an empty `width` x `height` alpha grid. This method behaves as if
|
||||||
|
/// the Rasterizer were re-created, with the advantage of not allocating if the total number of
|
||||||
|
/// pixels of the grid does not increase.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::Rasterizer;
|
||||||
|
/// # let mut rasterizer = Rasterizer::new(14, 38);
|
||||||
|
/// rasterizer.reset(12, 24);
|
||||||
|
/// assert_eq!(rasterizer.dimensions(), (12, 24));
|
||||||
|
/// ```
|
||||||
|
pub fn reset(&mut self, width: usize, height: usize) {
|
||||||
|
self.width = width;
|
||||||
|
self.height = height;
|
||||||
|
self.a.truncate(0);
|
||||||
|
self.a.resize(width * height + 4, 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clears the rasterizer. This method behaves as if the Rasterizer were re-created with the same
|
||||||
|
/// dimensions, but does not perform an allocation.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::Rasterizer;
|
||||||
|
/// # let mut rasterizer = Rasterizer::new(14, 38);
|
||||||
|
/// rasterizer.clear();
|
||||||
|
/// ```
|
||||||
|
pub fn clear(&mut self) {
|
||||||
|
for px in &mut self.a {
|
||||||
|
*px = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the dimensions the rasterizer was built to draw to.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// let rasterizer = Rasterizer::new(9, 8);
|
||||||
|
/// assert_eq!((9, 8), rasterizer.dimensions());
|
||||||
|
/// ```
|
||||||
|
pub fn dimensions(&self) -> (usize, usize) {
|
||||||
|
(self.width, self.height)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a straight line from `p0` to `p1` to the outline.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// # let mut rasterizer = Rasterizer::new(9, 8);
|
||||||
|
/// rasterizer.draw_line(point(0.0, 0.48), point(1.22, 0.48));
|
||||||
|
/// ```
|
||||||
|
pub fn draw_line(&mut self, p0: Point, p1: Point) {
|
||||||
|
if (p0.y - p1.y).abs() <= core::f32::EPSILON {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let (dir, p0, p1) = if p0.y < p1.y {
|
||||||
|
(1.0, p0, p1)
|
||||||
|
} else {
|
||||||
|
(-1.0, p1, p0)
|
||||||
|
};
|
||||||
|
let dxdy = (p1.x - p0.x) / (p1.y - p0.y);
|
||||||
|
let mut x = p0.x;
|
||||||
|
let y0 = p0.y as usize; // note: implicit max of 0 because usize (TODO: really true?)
|
||||||
|
if p0.y < 0.0 {
|
||||||
|
x -= p0.y * dxdy;
|
||||||
|
}
|
||||||
|
for y in y0..self.height.min(p1.y.ceil() as usize) {
|
||||||
|
let linestart = y * self.width;
|
||||||
|
let dy = ((y + 1) as f32).min(p1.y) - (y as f32).max(p0.y);
|
||||||
|
let xnext = x + dxdy * dy;
|
||||||
|
let d = dy * dir;
|
||||||
|
let (x0, x1) = if x < xnext { (x, xnext) } else { (xnext, x) };
|
||||||
|
let x0floor = x0.floor();
|
||||||
|
let x0i = x0floor as i32;
|
||||||
|
let x1ceil = x1.ceil();
|
||||||
|
let x1i = x1ceil as i32;
|
||||||
|
if x1i <= x0i + 1 {
|
||||||
|
let xmf = 0.5 * (x + xnext) - x0floor;
|
||||||
|
let linestart_x0i = linestart as isize + x0i as isize;
|
||||||
|
if linestart_x0i < 0 {
|
||||||
|
continue; // oob index
|
||||||
|
}
|
||||||
|
self.a[linestart_x0i as usize] += d - d * xmf;
|
||||||
|
self.a[linestart_x0i as usize + 1] += d * xmf;
|
||||||
|
} else {
|
||||||
|
let s = (x1 - x0).recip();
|
||||||
|
let x0f = x0 - x0floor;
|
||||||
|
let a0 = 0.5 * s * (1.0 - x0f) * (1.0 - x0f);
|
||||||
|
let x1f = x1 - x1ceil + 1.0;
|
||||||
|
let am = 0.5 * s * x1f * x1f;
|
||||||
|
let linestart_x0i = linestart as isize + x0i as isize;
|
||||||
|
if linestart_x0i < 0 {
|
||||||
|
continue; // oob index
|
||||||
|
}
|
||||||
|
self.a[linestart_x0i as usize] += d * a0;
|
||||||
|
if x1i == x0i + 2 {
|
||||||
|
self.a[linestart_x0i as usize + 1] += d * (1.0 - a0 - am);
|
||||||
|
} else {
|
||||||
|
let a1 = s * (1.5 - x0f);
|
||||||
|
self.a[linestart_x0i as usize + 1] += d * (a1 - a0);
|
||||||
|
for xi in x0i + 2..x1i - 1 {
|
||||||
|
self.a[linestart + xi as usize] += d * s;
|
||||||
|
}
|
||||||
|
let a2 = a1 + (x1i - x0i - 3) as f32 * s;
|
||||||
|
self.a[linestart + (x1i - 1) as usize] += d * (1.0 - a2 - am);
|
||||||
|
}
|
||||||
|
self.a[linestart + x1i as usize] += d * am;
|
||||||
|
}
|
||||||
|
x = xnext;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a quadratic Bézier curve from `p0` to `p2` to the outline using `p1` as the control.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// # let mut rasterizer = Rasterizer::new(14, 38);
|
||||||
|
/// rasterizer.draw_quad(point(6.2, 34.5), point(7.2, 34.5), point(9.2, 34.0));
|
||||||
|
/// ```
|
||||||
|
pub fn draw_quad(&mut self, p0: Point, p1: Point, p2: Point) {
|
||||||
|
let devx = p0.x - 2.0 * p1.x + p2.x;
|
||||||
|
let devy = p0.y - 2.0 * p1.y + p2.y;
|
||||||
|
let devsq = devx * devx + devy * devy;
|
||||||
|
if devsq < 0.333 {
|
||||||
|
self.draw_line(p0, p2);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let tol = 3.0;
|
||||||
|
let n = 1 + (tol * devsq).sqrt().sqrt().floor() as usize;
|
||||||
|
let mut p = p0;
|
||||||
|
let nrecip = (n as f32).recip();
|
||||||
|
let mut t = 0.0;
|
||||||
|
for _i in 0..n - 1 {
|
||||||
|
t += nrecip;
|
||||||
|
let pn = lerp(t, lerp(t, p0, p1), lerp(t, p1, p2));
|
||||||
|
self.draw_line(p, pn);
|
||||||
|
p = pn;
|
||||||
|
}
|
||||||
|
self.draw_line(p, p2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a cubic Bézier curve from `p0` to `p3` to the outline using `p1` as the control
|
||||||
|
/// at the beginning of the curve and `p2` at the end of the curve.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// # let mut rasterizer = Rasterizer::new(12, 20);
|
||||||
|
/// rasterizer.draw_cubic(
|
||||||
|
/// point(10.3, 16.4),
|
||||||
|
/// point(8.6, 16.9),
|
||||||
|
/// point(7.7, 16.5),
|
||||||
|
/// point(8.2, 15.2),
|
||||||
|
/// );
|
||||||
|
/// ```
|
||||||
|
pub fn draw_cubic(&mut self, p0: Point, p1: Point, p2: Point, p3: Point) {
|
||||||
|
self.tesselate_cubic(p0, p1, p2, p3, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// stb_truetype style cubic approximation by lines.
|
||||||
|
fn tesselate_cubic(&mut self, p0: Point, p1: Point, p2: Point, p3: Point, n: u8) {
|
||||||
|
// ...I'm not sure either ¯\_(ツ)_/¯
|
||||||
|
const OBJSPACE_FLATNESS: f32 = 0.35;
|
||||||
|
const OBJSPACE_FLATNESS_SQUARED: f32 = OBJSPACE_FLATNESS * OBJSPACE_FLATNESS;
|
||||||
|
const MAX_RECURSION_DEPTH: u8 = 16;
|
||||||
|
|
||||||
|
let longlen = p0.distance_to(p1) + p1.distance_to(p2) + p2.distance_to(p3);
|
||||||
|
let shortlen = p0.distance_to(p3);
|
||||||
|
let flatness_squared = longlen * longlen - shortlen * shortlen;
|
||||||
|
|
||||||
|
if n < MAX_RECURSION_DEPTH && flatness_squared > OBJSPACE_FLATNESS_SQUARED {
|
||||||
|
let p01 = lerp(0.5, p0, p1);
|
||||||
|
let p12 = lerp(0.5, p1, p2);
|
||||||
|
let p23 = lerp(0.5, p2, p3);
|
||||||
|
|
||||||
|
let pa = lerp(0.5, p01, p12);
|
||||||
|
let pb = lerp(0.5, p12, p23);
|
||||||
|
|
||||||
|
let mp = lerp(0.5, pa, pb);
|
||||||
|
|
||||||
|
self.tesselate_cubic(p0, p01, pa, mp, n + 1);
|
||||||
|
self.tesselate_cubic(mp, pb, p23, p3, n + 1);
|
||||||
|
} else {
|
||||||
|
self.draw_line(p0, p3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run a callback for each pixel index & alpha, with indices in `0..width * height`.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// # let (width, height) = (1, 1);
|
||||||
|
/// # let mut rasterizer = Rasterizer::new(width, height);
|
||||||
|
/// let mut pixels = vec![0u8; width * height];
|
||||||
|
/// rasterizer.for_each_pixel(|index, alpha| {
|
||||||
|
/// pixels[index] = (alpha * 255.0).round() as u8;
|
||||||
|
/// });
|
||||||
|
/// ```
|
||||||
|
pub fn for_each_pixel<O: FnMut(usize, f32)>(&self, mut px_fn: O) {
|
||||||
|
let mut acc = 0.0;
|
||||||
|
self.a[..self.width * self.height]
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.for_each(|(idx, c)| {
|
||||||
|
acc += c;
|
||||||
|
px_fn(idx, acc.abs().min(1.0));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run a callback for each pixel x position, y position & alpha.
|
||||||
|
///
|
||||||
|
/// Convenience wrapper for `for_each_pixel`.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ab_glyph_rasterizer::*;
|
||||||
|
/// # let (width, height) = (1, 1);
|
||||||
|
/// # let mut rasterizer = Rasterizer::new(width, height);
|
||||||
|
/// # struct Img;
|
||||||
|
/// # impl Img { fn set_pixel(&self, x: u32, y: u32, a: u8) {} }
|
||||||
|
/// # let image = Img;
|
||||||
|
/// rasterizer.for_each_pixel_2d(|x, y, alpha| {
|
||||||
|
/// image.set_pixel(x, y, (alpha * 255.0).round() as u8);
|
||||||
|
/// });
|
||||||
|
/// ```
|
||||||
|
pub fn for_each_pixel_2d<O: FnMut(u32, u32, f32)>(&self, mut px_fn: O) {
|
||||||
|
let width32 = self.width as u32;
|
||||||
|
self.for_each_pixel(|idx, alpha| px_fn(idx as u32 % width32, idx as u32 / width32, alpha));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// ```
|
||||||
|
/// let rasterizer = ab_glyph_rasterizer::Rasterizer::new(3, 4);
|
||||||
|
/// assert_eq!(&format!("{:?}", rasterizer), "Rasterizer { width: 3, height: 4 }");
|
||||||
|
/// ```
|
||||||
|
impl core::fmt::Debug for Rasterizer {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
f.debug_struct("Rasterizer")
|
||||||
|
.field("width", &self.width)
|
||||||
|
.field("height", &self.height)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
use ab_glyph_rasterizer::*;
|
||||||
|
|
||||||
|
/// Index oob panic rasterizing "Gauntl" using Bitter-Regular.otf
|
||||||
|
#[test]
|
||||||
|
fn rusttype_156_index_panic() {
|
||||||
|
let mut r = Rasterizer::new(6, 16);
|
||||||
|
r.draw_line(point(5.54, 14.299999), point(3.7399998, 13.799999));
|
||||||
|
r.draw_line(point(3.7399998, 13.799999), point(3.7399998, 0.0));
|
||||||
|
r.draw_line(point(3.7399998, 0.0), point(0.0, 0.10000038));
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
{"files":{"CHANGELOG.md":"042ed3158af7000c88a6617d775f11456bd30f6c7c8b5b586978faa1e11b1e24","Cargo.toml":"107d13689eecfa82a8b5ae35bf835b9d2775337226630e4bdb35f22d0dd52e18","LICENSE-0BSD":"861399f8c21c042b110517e76dc6b63a2b334276c8cf17412fc3c8908ca8dc17","LICENSE-APACHE":"8ada45cd9f843acf64e4722ae262c622a2b3b3007c7310ef36ac1061a30f6adb","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"fa83fd5ee10b61827de382e496bf66296a526e3d2c3b2aa5ad672aa15e8d2d7f","RELEASE_PROCESS.md":"a86cd10fc70f167f8d00e9e4ce0c6b4ebdfa1865058390dffd1e0ad4d3e68d9d","benches/bench.rs":"c07ce370e3680c602e415f8d1ec4e543ea2163ab22a09b6b82d93e8a30adca82","src/algo.rs":"b664b131f724a809591394a10b9023f40ab5963e32a83fa3163c2668e59c8b66","src/lib.rs":"67f3ca5b6333e22745b178b70f472514162cea2890344724f0f66995fcf19806"},"package":"ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"}
|
|
@ -0,0 +1,55 @@
|
||||||
|
"""
|
||||||
|
@generated
|
||||||
|
cargo-raze crate build file.
|
||||||
|
|
||||||
|
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||||
|
"""
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load(
|
||||||
|
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||||
|
"rust_binary",
|
||||||
|
"rust_library",
|
||||||
|
"rust_test",
|
||||||
|
)
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||||
|
|
||||||
|
package(default_visibility = [
|
||||||
|
# Public for visibility by "@raze__crate__version//" targets.
|
||||||
|
#
|
||||||
|
# Prefer access through "//third_party/cargo", which limits external
|
||||||
|
# visibility to explicit Cargo.toml dependencies.
|
||||||
|
"//visibility:public",
|
||||||
|
])
|
||||||
|
|
||||||
|
licenses([
|
||||||
|
"notice", # MIT from expression "0BSD OR (MIT OR Apache-2.0)"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Generated Targets
|
||||||
|
|
||||||
|
# Unsupported target "bench" with type "bench" omitted
|
||||||
|
|
||||||
|
rust_library(
|
||||||
|
name = "adler",
|
||||||
|
srcs = glob(["**/*.rs"]),
|
||||||
|
crate_features = [
|
||||||
|
],
|
||||||
|
crate_root = "src/lib.rs",
|
||||||
|
crate_type = "lib",
|
||||||
|
data = [],
|
||||||
|
edition = "2015",
|
||||||
|
rustc_flags = [
|
||||||
|
"--cap-lints=allow",
|
||||||
|
],
|
||||||
|
tags = [
|
||||||
|
"cargo-raze",
|
||||||
|
"manual",
|
||||||
|
],
|
||||||
|
version = "0.2.3",
|
||||||
|
# buildifier: leave-alone
|
||||||
|
deps = [
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,33 @@
|
||||||
|
# Changelog
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
|
No changes.
|
||||||
|
|
||||||
|
## [0.2.3 - 2020-07-11](https://github.com/jonas-schievink/adler/releases/tag/v0.2.3)
|
||||||
|
|
||||||
|
- Process 4 Bytes at a time, improving performance by up to 50% ([#2]).
|
||||||
|
|
||||||
|
## [0.2.2 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.2)
|
||||||
|
|
||||||
|
- Bump MSRV to 1.31.0.
|
||||||
|
|
||||||
|
## [0.2.1 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.1)
|
||||||
|
|
||||||
|
- Add a few `#[inline]` annotations to small functions.
|
||||||
|
- Fix CI badge.
|
||||||
|
- Allow integration into libstd.
|
||||||
|
|
||||||
|
## [0.2.0 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.0)
|
||||||
|
|
||||||
|
- Support `#![no_std]` when using `default-features = false`.
|
||||||
|
- Improve performance by around 7x.
|
||||||
|
- Support Rust 1.8.0.
|
||||||
|
- Improve API naming.
|
||||||
|
|
||||||
|
## [0.1.0 - 2020-06-26](https://github.com/jonas-schievink/adler/releases/tag/v0.1.0)
|
||||||
|
|
||||||
|
Initial release.
|
||||||
|
|
||||||
|
|
||||||
|
[#2]: https://github.com/jonas-schievink/adler/pull/2
|
|
@ -0,0 +1,69 @@
|
||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||||
|
#
|
||||||
|
# When uploading crates to the registry Cargo will automatically
|
||||||
|
# "normalize" Cargo.toml files for maximal compatibility
|
||||||
|
# with all versions of Cargo and also rewrite `path` dependencies
|
||||||
|
# to registry (e.g., crates.io) dependencies
|
||||||
|
#
|
||||||
|
# If you believe there's an error in this file please file an
|
||||||
|
# issue against the rust-lang/cargo repository. If you're
|
||||||
|
# editing this file be aware that the upstream Cargo.toml
|
||||||
|
# will likely look very different (and much more reasonable)
|
||||||
|
|
||||||
|
[package]
|
||||||
|
name = "adler"
|
||||||
|
version = "0.2.3"
|
||||||
|
authors = ["Jonas Schievink <jonasschievink@gmail.com>"]
|
||||||
|
description = "A simple clean-room implementation of the Adler-32 checksum"
|
||||||
|
documentation = "https://docs.rs/adler/"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = ["checksum", "integrity", "hash", "adler32"]
|
||||||
|
categories = ["algorithms"]
|
||||||
|
license = "0BSD OR MIT OR Apache-2.0"
|
||||||
|
repository = "https://github.com/jonas-schievink/adler.git"
|
||||||
|
[package.metadata.docs.rs]
|
||||||
|
rustdoc-args = ["--cfg docsrs"]
|
||||||
|
|
||||||
|
[package.metadata.release]
|
||||||
|
no-dev-version = true
|
||||||
|
pre-release-commit-message = "Release {{version}}"
|
||||||
|
tag-message = "{{version}}"
|
||||||
|
|
||||||
|
[[package.metadata.release.pre-release-replacements]]
|
||||||
|
file = "CHANGELOG.md"
|
||||||
|
replace = "## Unreleased\n\nNo changes.\n\n## [{{version}} - {{date}}](https://github.com/jonas-schievink/adler/releases/tag/v{{version}})\n"
|
||||||
|
search = "## Unreleased\n"
|
||||||
|
|
||||||
|
[[package.metadata.release.pre-release-replacements]]
|
||||||
|
file = "README.md"
|
||||||
|
replace = "adler = \"{{version}}\""
|
||||||
|
search = "adler = \"[a-z0-9\\\\.-]+\""
|
||||||
|
|
||||||
|
[[package.metadata.release.pre-release-replacements]]
|
||||||
|
file = "src/lib.rs"
|
||||||
|
replace = "https://docs.rs/adler/{{version}}"
|
||||||
|
search = "https://docs.rs/adler/[a-z0-9\\.-]+"
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "bench"
|
||||||
|
harness = false
|
||||||
|
[dependencies.compiler_builtins]
|
||||||
|
version = "0.1.2"
|
||||||
|
optional = true
|
||||||
|
|
||||||
|
[dependencies.core]
|
||||||
|
version = "1.0.0"
|
||||||
|
optional = true
|
||||||
|
package = "rustc-std-workspace-core"
|
||||||
|
[dev-dependencies.criterion]
|
||||||
|
version = "0.3.2"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["std"]
|
||||||
|
rustc-dep-of-std = ["core", "compiler_builtins"]
|
||||||
|
std = []
|
||||||
|
[badges.maintenance]
|
||||||
|
status = "actively-developed"
|
||||||
|
|
||||||
|
[badges.travis-ci]
|
||||||
|
repository = "jonas-schievink/adler"
|
|
@ -0,0 +1,12 @@
|
||||||
|
Copyright (C) Jonas Schievink <jonasschievink@gmail.com>
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and/or distribute this software for
|
||||||
|
any purpose with or without fee is hereby granted.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||||
|
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
||||||
|
AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||||
|
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
@ -0,0 +1,201 @@
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
|
@ -0,0 +1,38 @@
|
||||||
|
# Adler-32 checksums for Rust
|
||||||
|
|
||||||
|
[![crates.io](https://img.shields.io/crates/v/adler.svg)](https://crates.io/crates/adler)
|
||||||
|
[![docs.rs](https://docs.rs/adler/badge.svg)](https://docs.rs/adler/)
|
||||||
|
![CI](https://github.com/jonas-schievink/adler/workflows/CI/badge.svg)
|
||||||
|
|
||||||
|
This crate provides a simple implementation of the Adler-32 checksum, used in
|
||||||
|
zlib, rsync, and other software.
|
||||||
|
|
||||||
|
Please refer to the [changelog](CHANGELOG.md) to see what changed in the last
|
||||||
|
releases.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Permissively licensed (0BSD) clean-room implementation.
|
||||||
|
- Zero dependencies.
|
||||||
|
- Decent performance (3-4 GB/s).
|
||||||
|
- Supports `#![no_std]` (with `default-features = false`).
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Add an entry to your `Cargo.toml`:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[dependencies]
|
||||||
|
adler = "0.2.3"
|
||||||
|
```
|
||||||
|
|
||||||
|
Check the [API Documentation](https://docs.rs/adler/) for how to use the
|
||||||
|
crate's functionality.
|
||||||
|
|
||||||
|
## Rust version support
|
||||||
|
|
||||||
|
Currently, this crate supports all Rust versions starting at Rust 1.31.0.
|
||||||
|
|
||||||
|
Bumping the Minimum Supported Rust Version (MSRV) is *not* considered a breaking
|
||||||
|
change, but will not be done without good reasons. The latest 3 stable Rust
|
||||||
|
versions will always be supported no matter what.
|
|
@ -0,0 +1,13 @@
|
||||||
|
# What to do to publish a new release
|
||||||
|
|
||||||
|
1. Ensure all notable changes are in the changelog under "Unreleased".
|
||||||
|
|
||||||
|
2. Execute `cargo release <level>` to bump version(s), tag and publish
|
||||||
|
everything. External subcommand, must be installed with `cargo install
|
||||||
|
cargo-release`.
|
||||||
|
|
||||||
|
`<level>` can be one of `major|minor|patch`. If this is the first release
|
||||||
|
(`0.1.0`), use `minor`, since the version starts out as `0.0.0`.
|
||||||
|
|
||||||
|
3. Go to the GitHub releases, edit the just-pushed tag. Copy the release notes
|
||||||
|
from the changelog.
|
|
@ -0,0 +1,109 @@
|
||||||
|
extern crate adler;
|
||||||
|
extern crate criterion;
|
||||||
|
|
||||||
|
use adler::{adler32_slice, Adler32};
|
||||||
|
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
|
||||||
|
|
||||||
|
fn simple(c: &mut Criterion) {
|
||||||
|
{
|
||||||
|
const SIZE: usize = 100;
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("simple-100b");
|
||||||
|
group.throughput(Throughput::Bytes(SIZE as u64));
|
||||||
|
group.bench_function("zeroes-100", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("ones-100", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0xff; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const SIZE: usize = 1024;
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("simple-1k");
|
||||||
|
group.throughput(Throughput::Bytes(SIZE as u64));
|
||||||
|
|
||||||
|
group.bench_function("zeroes-1k", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
group.bench_function("ones-1k", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0xff; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const SIZE: usize = 1024 * 1024;
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("simple-1m");
|
||||||
|
group.throughput(Throughput::Bytes(SIZE as u64));
|
||||||
|
group.bench_function("zeroes-1m", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
group.bench_function("ones-1m", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0xff; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn chunked(c: &mut Criterion) {
|
||||||
|
const SIZE: usize = 16 * 1024 * 1024;
|
||||||
|
|
||||||
|
let data = vec![0xAB; SIZE];
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("chunked-16m");
|
||||||
|
group.throughput(Throughput::Bytes(SIZE as u64));
|
||||||
|
group.bench_function("5552", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
for chunk in data.chunks(5552) {
|
||||||
|
h.write_slice(chunk);
|
||||||
|
}
|
||||||
|
h.checksum()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("8k", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
for chunk in data.chunks(8 * 1024) {
|
||||||
|
h.write_slice(chunk);
|
||||||
|
}
|
||||||
|
h.checksum()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("64k", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
for chunk in data.chunks(64 * 1024) {
|
||||||
|
h.write_slice(chunk);
|
||||||
|
}
|
||||||
|
h.checksum()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("1m", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
for chunk in data.chunks(1024 * 1024) {
|
||||||
|
h.write_slice(chunk);
|
||||||
|
}
|
||||||
|
h.checksum()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, simple, chunked);
|
||||||
|
criterion_main!(benches);
|
|
@ -0,0 +1,146 @@
|
||||||
|
use crate::Adler32;
|
||||||
|
use std::ops::{AddAssign, MulAssign, RemAssign};
|
||||||
|
|
||||||
|
impl Adler32 {
|
||||||
|
pub(crate) fn compute(&mut self, bytes: &[u8]) {
|
||||||
|
// The basic algorithm is, for every byte:
|
||||||
|
// a = (a + byte) % MOD
|
||||||
|
// b = (b + a) % MOD
|
||||||
|
// where MOD = 65521.
|
||||||
|
//
|
||||||
|
// For efficiency, we can defer the `% MOD` operations as long as neither a nor b overflows:
|
||||||
|
// - Between calls to `write`, we ensure that a and b are always in range 0..MOD.
|
||||||
|
// - We use 32-bit arithmetic in this function.
|
||||||
|
// - Therefore, a and b must not increase by more than 2^32-MOD without performing a `% MOD`
|
||||||
|
// operation.
|
||||||
|
//
|
||||||
|
// According to Wikipedia, b is calculated as follows for non-incremental checksumming:
|
||||||
|
// b = n×D1 + (n−1)×D2 + (n−2)×D3 + ... + Dn + n*1 (mod 65521)
|
||||||
|
// Where n is the number of bytes and Di is the i-th Byte. We need to change this to account
|
||||||
|
// for the previous values of a and b, as well as treat every input Byte as being 255:
|
||||||
|
// b_inc = n×255 + (n-1)×255 + ... + 255 + n*65520
|
||||||
|
// Or in other words:
|
||||||
|
// b_inc = n*65520 + n(n+1)/2*255
|
||||||
|
// The max chunk size is thus the largest value of n so that b_inc <= 2^32-65521.
|
||||||
|
// 2^32-65521 = n*65520 + n(n+1)/2*255
|
||||||
|
// Plugging this into an equation solver since I can't math gives n = 5552.18..., so 5552.
|
||||||
|
//
|
||||||
|
// On top of the optimization outlined above, the algorithm can also be parallelized with a
|
||||||
|
// bit more work:
|
||||||
|
//
|
||||||
|
// Note that b is a linear combination of a vector of input bytes (D1, ..., Dn).
|
||||||
|
//
|
||||||
|
// If we fix some value k<N and rewrite indices 1, ..., N as
|
||||||
|
//
|
||||||
|
// 1_1, 1_2, ..., 1_k, 2_1, ..., 2_k, ..., (N/k)_k,
|
||||||
|
//
|
||||||
|
// then we can express a and b in terms of sums of smaller sequences kb and ka:
|
||||||
|
//
|
||||||
|
// ka(j) := D1_j + D2_j + ... + D(N/k)_j where j <= k
|
||||||
|
// kb(j) := (N/k)*D1_j + (N/k-1)*D2_j + ... + D(N/k)_j where j <= k
|
||||||
|
//
|
||||||
|
// a = ka(1) + ka(2) + ... + ka(k) + 1
|
||||||
|
// b = k*(kb(1) + kb(2) + ... + kb(k)) - 1*ka(2) - ... - (k-1)*ka(k) + N
|
||||||
|
//
|
||||||
|
// We use this insight to unroll the main loop and process k=4 bytes at a time.
|
||||||
|
// The resulting code is highly amenable to SIMD acceleration, although the immediate speedups
|
||||||
|
// stem from increased pipeline parallelism rather than auto-vectorization.
|
||||||
|
//
|
||||||
|
// This technique is described in-depth (here:)[https://software.intel.com/content/www/us/\
|
||||||
|
// en/develop/articles/fast-computation-of-fletcher-checksums.html]
|
||||||
|
|
||||||
|
const MOD: u32 = 65521;
|
||||||
|
const CHUNK_SIZE: usize = 5552 * 4;
|
||||||
|
|
||||||
|
let mut a = u32::from(self.a);
|
||||||
|
let mut b = u32::from(self.b);
|
||||||
|
let mut a_vec = U32X4([0; 4]);
|
||||||
|
let mut b_vec = a_vec;
|
||||||
|
|
||||||
|
let (bytes, remainder) = bytes.split_at(bytes.len() - bytes.len() % 4);
|
||||||
|
|
||||||
|
// iterate over 4 bytes at a time
|
||||||
|
let chunk_iter = bytes.chunks_exact(CHUNK_SIZE);
|
||||||
|
let remainder_chunk = chunk_iter.remainder();
|
||||||
|
for chunk in chunk_iter {
|
||||||
|
for byte_vec in chunk.chunks_exact(4) {
|
||||||
|
let val = U32X4::from(byte_vec);
|
||||||
|
a_vec += val;
|
||||||
|
b_vec += a_vec;
|
||||||
|
}
|
||||||
|
b += CHUNK_SIZE as u32 * a;
|
||||||
|
a_vec %= MOD;
|
||||||
|
b_vec %= MOD;
|
||||||
|
b %= MOD;
|
||||||
|
}
|
||||||
|
// special-case the final chunk because it may be shorter than the rest
|
||||||
|
for byte_vec in remainder_chunk.chunks_exact(4) {
|
||||||
|
let val = U32X4::from(byte_vec);
|
||||||
|
a_vec += val;
|
||||||
|
b_vec += a_vec;
|
||||||
|
}
|
||||||
|
b += remainder_chunk.len() as u32 * a;
|
||||||
|
a_vec %= MOD;
|
||||||
|
b_vec %= MOD;
|
||||||
|
b %= MOD;
|
||||||
|
|
||||||
|
// combine the sub-sum results into the main sum
|
||||||
|
b_vec *= 4;
|
||||||
|
b_vec.0[1] += MOD - a_vec.0[1];
|
||||||
|
b_vec.0[2] += (MOD - a_vec.0[2]) * 2;
|
||||||
|
b_vec.0[3] += (MOD - a_vec.0[3]) * 3;
|
||||||
|
for &av in a_vec.0.iter() {
|
||||||
|
a += av;
|
||||||
|
}
|
||||||
|
for &bv in b_vec.0.iter() {
|
||||||
|
b += bv;
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate over the remaining few bytes in serial
|
||||||
|
for &byte in remainder.iter() {
|
||||||
|
a += u32::from(byte);
|
||||||
|
b += a;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.a = (a % MOD) as u16;
|
||||||
|
self.b = (b % MOD) as u16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
struct U32X4([u32; 4]);
|
||||||
|
|
||||||
|
impl U32X4 {
|
||||||
|
fn from(bytes: &[u8]) -> Self {
|
||||||
|
U32X4([
|
||||||
|
u32::from(bytes[0]),
|
||||||
|
u32::from(bytes[1]),
|
||||||
|
u32::from(bytes[2]),
|
||||||
|
u32::from(bytes[3]),
|
||||||
|
])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AddAssign<Self> for U32X4 {
|
||||||
|
fn add_assign(&mut self, other: Self) {
|
||||||
|
for (s, o) in self.0.iter_mut().zip(other.0.iter()) {
|
||||||
|
*s += o;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RemAssign<u32> for U32X4 {
|
||||||
|
fn rem_assign(&mut self, quotient: u32) {
|
||||||
|
for s in self.0.iter_mut() {
|
||||||
|
*s %= quotient;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MulAssign<u32> for U32X4 {
|
||||||
|
fn mul_assign(&mut self, rhs: u32) {
|
||||||
|
for s in self.0.iter_mut() {
|
||||||
|
*s *= rhs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,215 @@
|
||||||
|
//! Adler-32 checksum implementation.
|
||||||
|
//!
|
||||||
|
//! This implementation features:
|
||||||
|
//!
|
||||||
|
//! - Permissively licensed (0BSD) clean-room implementation.
|
||||||
|
//! - Zero dependencies.
|
||||||
|
//! - Decent performance (3-4 GB/s).
|
||||||
|
//! - `#![no_std]` support (with `default-features = false`).
|
||||||
|
|
||||||
|
#![doc(html_root_url = "https://docs.rs/adler/0.2.3")]
|
||||||
|
// Deny a few warnings in doctests, since rustdoc `allow`s many warnings by default
|
||||||
|
#![doc(test(attr(deny(unused_imports, unused_must_use))))]
|
||||||
|
#![cfg_attr(docsrs, feature(doc_cfg))]
|
||||||
|
#![warn(missing_debug_implementations)]
|
||||||
|
#![forbid(unsafe_code)]
|
||||||
|
#![cfg_attr(not(feature = "std"), no_std)]
|
||||||
|
|
||||||
|
#[cfg(not(feature = "std"))]
|
||||||
|
extern crate core as std;
|
||||||
|
|
||||||
|
mod algo;
|
||||||
|
|
||||||
|
use std::hash::Hasher;
|
||||||
|
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
use std::io::{self, BufRead};
|
||||||
|
|
||||||
|
/// Adler-32 checksum calculator.
|
||||||
|
///
|
||||||
|
/// An instance of this type is equivalent to an Adler-32 checksum: It can be created in the default
|
||||||
|
/// state via [`new`] (or the provided `Default` impl), or from a precalculated checksum via
|
||||||
|
/// [`from_checksum`], and the currently stored checksum can be fetched via [`checksum`].
|
||||||
|
///
|
||||||
|
/// This type also implements `Hasher`, which makes it easy to calculate Adler-32 checksums of any
|
||||||
|
/// type that implements or derives `Hash`. This also allows using Adler-32 in a `HashMap`, although
|
||||||
|
/// that is not recommended (while every checksum is a hash, they are not necessarily good at being
|
||||||
|
/// one).
|
||||||
|
///
|
||||||
|
/// [`new`]: #method.new
|
||||||
|
/// [`from_checksum`]: #method.from_checksum
|
||||||
|
/// [`checksum`]: #method.checksum
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct Adler32 {
|
||||||
|
a: u16,
|
||||||
|
b: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Adler32 {
|
||||||
|
/// Creates a new Adler-32 instance with default state.
|
||||||
|
#[inline]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates an `Adler32` instance from a precomputed Adler-32 checksum.
|
||||||
|
///
|
||||||
|
/// This allows resuming checksum calculation without having to keep the `Adler32` instance
|
||||||
|
/// around.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use adler::Adler32;
|
||||||
|
/// let parts = [
|
||||||
|
/// "rust",
|
||||||
|
/// "acean",
|
||||||
|
/// ];
|
||||||
|
/// let whole = adler::adler32_slice(b"rustacean");
|
||||||
|
///
|
||||||
|
/// let mut sum = Adler32::new();
|
||||||
|
/// sum.write_slice(parts[0].as_bytes());
|
||||||
|
/// let partial = sum.checksum();
|
||||||
|
///
|
||||||
|
/// // ...later
|
||||||
|
///
|
||||||
|
/// let mut sum = Adler32::from_checksum(partial);
|
||||||
|
/// sum.write_slice(parts[1].as_bytes());
|
||||||
|
/// assert_eq!(sum.checksum(), whole);
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn from_checksum(sum: u32) -> Self {
|
||||||
|
Adler32 {
|
||||||
|
a: sum as u16,
|
||||||
|
b: (sum >> 16) as u16,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the calculated checksum at this point in time.
|
||||||
|
#[inline]
|
||||||
|
pub fn checksum(&self) -> u32 {
|
||||||
|
(u32::from(self.b) << 16) | u32::from(self.a)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds `bytes` to the checksum calculation.
|
||||||
|
///
|
||||||
|
/// If efficiency matters, this should be called with Byte slices that contain at least a few
|
||||||
|
/// thousand Bytes.
|
||||||
|
pub fn write_slice(&mut self, bytes: &[u8]) {
|
||||||
|
self.compute(bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Adler32 {
|
||||||
|
#[inline]
|
||||||
|
fn default() -> Self {
|
||||||
|
Adler32 { a: 1, b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Hasher for Adler32 {
|
||||||
|
#[inline]
|
||||||
|
fn finish(&self) -> u64 {
|
||||||
|
u64::from(self.checksum())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write(&mut self, bytes: &[u8]) {
|
||||||
|
self.write_slice(bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculates the Adler-32 checksum of a byte slice.
|
||||||
|
pub fn adler32_slice(data: &[u8]) -> u32 {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
h.write_slice(data);
|
||||||
|
h.checksum()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculates the Adler-32 checksum of a `BufRead`'s contents.
|
||||||
|
///
|
||||||
|
/// The passed `BufRead` implementor will be read until it reaches EOF.
|
||||||
|
///
|
||||||
|
/// If you only have a `Read` implementor, wrap it in `std::io::BufReader`.
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
|
||||||
|
pub fn adler32_reader<R: BufRead>(reader: &mut R) -> io::Result<u32> {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
loop {
|
||||||
|
let len = {
|
||||||
|
let buf = reader.fill_buf()?;
|
||||||
|
if buf.is_empty() {
|
||||||
|
return Ok(h.checksum());
|
||||||
|
}
|
||||||
|
|
||||||
|
h.write_slice(buf);
|
||||||
|
buf.len()
|
||||||
|
};
|
||||||
|
reader.consume(len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::io::BufReader;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn zeroes() {
|
||||||
|
assert_eq!(adler32_slice(&[]), 1);
|
||||||
|
assert_eq!(adler32_slice(&[0]), 1 | 1 << 16);
|
||||||
|
assert_eq!(adler32_slice(&[0, 0]), 1 | 2 << 16);
|
||||||
|
assert_eq!(adler32_slice(&[0; 100]), 0x00640001);
|
||||||
|
assert_eq!(adler32_slice(&[0; 1024]), 0x04000001);
|
||||||
|
assert_eq!(adler32_slice(&[0; 1024 * 1024]), 0x00f00001);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ones() {
|
||||||
|
assert_eq!(adler32_slice(&[0xff; 1024]), 0x79a6fc2e);
|
||||||
|
assert_eq!(adler32_slice(&[0xff; 1024 * 1024]), 0x8e88ef11);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn mixed() {
|
||||||
|
assert_eq!(adler32_slice(&[1]), 2 | 2 << 16);
|
||||||
|
assert_eq!(adler32_slice(&[40]), 41 | 41 << 16);
|
||||||
|
|
||||||
|
assert_eq!(adler32_slice(&[0xA5; 1024 * 1024]), 0xd5009ab1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
|
||||||
|
#[test]
|
||||||
|
fn wiki() {
|
||||||
|
assert_eq!(adler32_slice(b"Wikipedia"), 0x11E60398);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resume() {
|
||||||
|
let mut adler = Adler32::new();
|
||||||
|
adler.write_slice(&[0xff; 1024]);
|
||||||
|
let partial = adler.checksum();
|
||||||
|
assert_eq!(partial, 0x79a6fc2e); // from above
|
||||||
|
adler.write_slice(&[0xff; 1024 * 1024 - 1024]);
|
||||||
|
assert_eq!(adler.checksum(), 0x8e88ef11); // from above
|
||||||
|
|
||||||
|
// Make sure that we can resume computing from the partial checksum via `from_checksum`.
|
||||||
|
let mut adler = Adler32::from_checksum(partial);
|
||||||
|
adler.write_slice(&[0xff; 1024 * 1024 - 1024]);
|
||||||
|
assert_eq!(adler.checksum(), 0x8e88ef11); // from above
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bufread() {
|
||||||
|
fn test(data: &[u8], checksum: u32) {
|
||||||
|
// `BufReader` uses an 8 KB buffer, so this will test buffer refilling.
|
||||||
|
let mut buf = BufReader::new(data);
|
||||||
|
let real_sum = adler32_reader(&mut buf).unwrap();
|
||||||
|
assert_eq!(checksum, real_sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
test(&[], 1);
|
||||||
|
test(&[0; 1024], 0x04000001);
|
||||||
|
test(&[0; 1024 * 1024], 0x00f00001);
|
||||||
|
test(&[0xA5; 1024 * 1024], 0xd5009ab1);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1 +0,0 @@
|
||||||
{"files":{"Cargo.toml":"3dfd0367a0af86dd57c4faf9f8a5b1ce8179c38e28d470d3c46ce2d2b45ef20f","LICENSE":"9efeecf73f68ed91830f71c69a53de1328d1f8c6968a68ca6e6b2d6f3a92a088","README.md":"77c9e2080e5ae700403343c27fe08bb616f1df92a8b42b0e7808a7b7d32eb7a2","appveyor.yml":"4873092bae0713890497e5ceae761af359d680e6cce5ce003bf38bc5c45cde44","src/lib.rs":"596ac0c2bbdfa759fb79eb7b7d9e18d6c51be0849f22204a85c4906fe2ae8bde"},"package":"5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"}
|
|
|
@ -1,53 +0,0 @@
|
||||||
"""
|
|
||||||
@generated
|
|
||||||
cargo-raze crate build file.
|
|
||||||
|
|
||||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
|
||||||
"""
|
|
||||||
|
|
||||||
# buildifier: disable=load
|
|
||||||
load(
|
|
||||||
"@io_bazel_rules_rust//rust:rust.bzl",
|
|
||||||
"rust_binary",
|
|
||||||
"rust_library",
|
|
||||||
"rust_test",
|
|
||||||
)
|
|
||||||
|
|
||||||
# buildifier: disable=load
|
|
||||||
load("@bazel_skylib//lib:selects.bzl", "selects")
|
|
||||||
|
|
||||||
package(default_visibility = [
|
|
||||||
# Public for visibility by "@raze__crate__version//" targets.
|
|
||||||
#
|
|
||||||
# Prefer access through "//third_party/cargo", which limits external
|
|
||||||
# visibility to explicit Cargo.toml dependencies.
|
|
||||||
"//visibility:public",
|
|
||||||
])
|
|
||||||
|
|
||||||
licenses([
|
|
||||||
"notice", # Zlib from expression "Zlib"
|
|
||||||
])
|
|
||||||
|
|
||||||
# Generated Targets
|
|
||||||
|
|
||||||
rust_library(
|
|
||||||
name = "adler32",
|
|
||||||
srcs = glob(["**/*.rs"]),
|
|
||||||
crate_features = [
|
|
||||||
],
|
|
||||||
crate_root = "src/lib.rs",
|
|
||||||
crate_type = "lib",
|
|
||||||
data = [],
|
|
||||||
edition = "2015",
|
|
||||||
rustc_flags = [
|
|
||||||
"--cap-lints=allow",
|
|
||||||
],
|
|
||||||
tags = [
|
|
||||||
"cargo-raze",
|
|
||||||
"manual",
|
|
||||||
],
|
|
||||||
version = "1.0.4",
|
|
||||||
# buildifier: leave-alone
|
|
||||||
deps = [
|
|
||||||
],
|
|
||||||
)
|
|
|
@ -1,24 +0,0 @@
|
||||||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
|
||||||
#
|
|
||||||
# When uploading crates to the registry Cargo will automatically
|
|
||||||
# "normalize" Cargo.toml files for maximal compatibility
|
|
||||||
# with all versions of Cargo and also rewrite `path` dependencies
|
|
||||||
# to registry (e.g., crates.io) dependencies
|
|
||||||
#
|
|
||||||
# If you believe there's an error in this file please file an
|
|
||||||
# issue against the rust-lang/cargo repository. If you're
|
|
||||||
# editing this file be aware that the upstream Cargo.toml
|
|
||||||
# will likely look very different (and much more reasonable)
|
|
||||||
|
|
||||||
[package]
|
|
||||||
name = "adler32"
|
|
||||||
version = "1.0.4"
|
|
||||||
authors = ["Remi Rampin <remirampin@gmail.com>"]
|
|
||||||
description = "Minimal Adler32 implementation for Rust."
|
|
||||||
documentation = "https://remram44.github.io/adler32-rs/index.html"
|
|
||||||
readme = "README.md"
|
|
||||||
keywords = ["adler32", "hash", "rolling"]
|
|
||||||
license = "Zlib"
|
|
||||||
repository = "https://github.com/remram44/adler32-rs"
|
|
||||||
[dev-dependencies.rand]
|
|
||||||
version = "0.4"
|
|
|
@ -1,43 +0,0 @@
|
||||||
Copyright notice for the Rust port:
|
|
||||||
|
|
||||||
(C) 2016 Remi Rampin
|
|
||||||
|
|
||||||
This software is provided 'as-is', without any express or implied
|
|
||||||
warranty. In no event will the authors be held liable for any damages
|
|
||||||
arising from the use of this software.
|
|
||||||
|
|
||||||
Permission is granted to anyone to use this software for any purpose,
|
|
||||||
including commercial applications, and to alter it and redistribute it
|
|
||||||
freely, subject to the following restrictions:
|
|
||||||
|
|
||||||
1. The origin of this software must not be misrepresented; you must not
|
|
||||||
claim that you wrote the original software. If you use this software
|
|
||||||
in a product, an acknowledgment in the product documentation would be
|
|
||||||
appreciated but is not required.
|
|
||||||
2. Altered source versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
3. This notice may not be removed or altered from any source distribution.
|
|
||||||
|
|
||||||
|
|
||||||
Copyright notice for the original C code from the zlib project:
|
|
||||||
|
|
||||||
(C) 1995-2017 Jean-loup Gailly and Mark Adler
|
|
||||||
|
|
||||||
This software is provided 'as-is', without any express or implied
|
|
||||||
warranty. In no event will the authors be held liable for any damages
|
|
||||||
arising from the use of this software.
|
|
||||||
|
|
||||||
Permission is granted to anyone to use this software for any purpose,
|
|
||||||
including commercial applications, and to alter it and redistribute it
|
|
||||||
freely, subject to the following restrictions:
|
|
||||||
|
|
||||||
1. The origin of this software must not be misrepresented; you must not
|
|
||||||
claim that you wrote the original software. If you use this software
|
|
||||||
in a product, an acknowledgment in the product documentation would be
|
|
||||||
appreciated but is not required.
|
|
||||||
2. Altered source versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
3. This notice may not be removed or altered from any source distribution.
|
|
||||||
|
|
||||||
Jean-loup Gailly Mark Adler
|
|
||||||
jloup@gzip.org madler@alumni.caltech.edu
|
|
|
@ -1,13 +0,0 @@
|
||||||
[![Build Status](https://travis-ci.org/remram44/adler32-rs.svg?branch=master)](https://travis-ci.org/remram44/adler32-rs/builds)
|
|
||||||
[![Win Build](https://ci.appveyor.com/api/projects/status/ekyg20rd6rwrus64/branch/master?svg=true)](https://ci.appveyor.com/project/remram44/adler32-rs)
|
|
||||||
[![Crates.io](https://img.shields.io/crates/v/adler32.svg)](https://crates.io/crates/adler32)
|
|
||||||
[![Say Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/remram44)
|
|
||||||
|
|
||||||
What is this?
|
|
||||||
=============
|
|
||||||
|
|
||||||
It is an implementation of the [Adler32 rolling hash algorithm](https://en.wikipedia.org/wiki/Adler-32) in the [Rust programming language](https://www.rust-lang.org/).
|
|
||||||
|
|
||||||
It is adapted from Jean-Loup Gailly's and Mark Adler's [original implementation in zlib](https://github.com/madler/zlib/blob/2fa463bacfff79181df1a5270fb67cc679a53e71/adler32.c). A copy of the zlib copyright and license can be found in LICENSE-ZLIB.
|
|
||||||
|
|
||||||
[Generated documentation](https://remram44.github.io/adler32-rs/index.html)
|
|
|
@ -1,12 +0,0 @@
|
||||||
install:
|
|
||||||
- ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-nightly-i686-pc-windows-gnu.exe'
|
|
||||||
- rust-nightly-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust"
|
|
||||||
- set PATH=%PATH%;C:\Program Files (x86)\Rust\bin
|
|
||||||
- rustc -V
|
|
||||||
- cargo -V
|
|
||||||
|
|
||||||
build: false
|
|
||||||
|
|
||||||
test_script:
|
|
||||||
- cargo build --verbose
|
|
||||||
- cargo test --verbose
|
|
|
@ -1,307 +0,0 @@
|
||||||
//! A minimal implementation of Adler32 for Rust.
|
|
||||||
//!
|
|
||||||
//! This provides the simple method adler32(), that exhausts a Read and
|
|
||||||
//! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
|
|
||||||
//! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
|
|
||||||
//! fashion.
|
|
||||||
//!
|
|
||||||
//! The adler32 code has been translated (as accurately as I could manage) from
|
|
||||||
//! the zlib implementation.
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
extern crate rand;
|
|
||||||
|
|
||||||
use std::io;
|
|
||||||
|
|
||||||
// adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
|
|
||||||
// It was translated into Rust as accurately as I could manage
|
|
||||||
// The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
|
|
||||||
|
|
||||||
/* zlib.h -- interface of the 'zlib' general purpose compression library
|
|
||||||
version 1.2.8, April 28th, 2013
|
|
||||||
|
|
||||||
Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
|
||||||
|
|
||||||
This software is provided 'as-is', without any express or implied
|
|
||||||
warranty. In no event will the authors be held liable for any damages
|
|
||||||
arising from the use of this software.
|
|
||||||
|
|
||||||
Permission is granted to anyone to use this software for any purpose,
|
|
||||||
including commercial applications, and to alter it and redistribute it
|
|
||||||
freely, subject to the following restrictions:
|
|
||||||
|
|
||||||
1. The origin of this software must not be misrepresented; you must not
|
|
||||||
claim that you wrote the original software. If you use this software
|
|
||||||
in a product, an acknowledgment in the product documentation would be
|
|
||||||
appreciated but is not required.
|
|
||||||
2. Altered source versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
3. This notice may not be removed or altered from any source distribution.
|
|
||||||
|
|
||||||
Jean-loup Gailly Mark Adler
|
|
||||||
jloup@gzip.org madler@alumni.caltech.edu
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
// largest prime smaller than 65536
|
|
||||||
const BASE: u32 = 65521;
|
|
||||||
|
|
||||||
// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
|
|
||||||
const NMAX: usize = 5552;
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
|
|
||||||
*adler += u32::from(buf[0]);
|
|
||||||
*sum2 += *adler;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
|
|
||||||
do1(adler, sum2, &buf[0..1]);
|
|
||||||
do1(adler, sum2, &buf[1..2]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
|
|
||||||
do2(adler, sum2, &buf[0..2]);
|
|
||||||
do2(adler, sum2, &buf[2..4]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
|
|
||||||
do4(adler, sum2, &buf[0..4]);
|
|
||||||
do4(adler, sum2, &buf[4..8]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
|
|
||||||
do8(adler, sum2, &buf[0..8]);
|
|
||||||
do8(adler, sum2, &buf[8..16]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A rolling version of the Adler32 hash, which can 'forget' past bytes.
|
|
||||||
///
|
|
||||||
/// Calling remove() will update the hash to the value it would have if that
|
|
||||||
/// past byte had never been fed to the algorithm. This allows you to get the
|
|
||||||
/// hash of a rolling window very efficiently.
|
|
||||||
pub struct RollingAdler32 {
|
|
||||||
a: u32,
|
|
||||||
b: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for RollingAdler32 {
|
|
||||||
fn default() -> RollingAdler32 {
|
|
||||||
RollingAdler32::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl RollingAdler32 {
|
|
||||||
/// Creates an empty Adler32 context (with hash 1).
|
|
||||||
pub fn new() -> RollingAdler32 {
|
|
||||||
Self::from_value(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Creates an Adler32 context with the given initial value.
|
|
||||||
pub fn from_value(adler32: u32) -> RollingAdler32 {
|
|
||||||
let a = adler32 & 0xFFFF;
|
|
||||||
let b = adler32 >> 16;
|
|
||||||
RollingAdler32 { a, b }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convenience function initializing a context from the hash of a buffer.
|
|
||||||
pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
|
|
||||||
let mut hash = RollingAdler32::new();
|
|
||||||
hash.update_buffer(buffer);
|
|
||||||
hash
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the current hash.
|
|
||||||
pub fn hash(&self) -> u32 {
|
|
||||||
(self.b << 16) | self.a
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
|
|
||||||
pub fn remove(&mut self, size: usize, byte: u8) {
|
|
||||||
let byte = u32::from(byte);
|
|
||||||
self.a = (self.a + BASE - byte) % BASE;
|
|
||||||
self.b = ((self.b + BASE - 1)
|
|
||||||
.wrapping_add(BASE.wrapping_sub(size as u32)
|
|
||||||
.wrapping_mul(byte))) % BASE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Feeds a new `byte` to the algorithm to update the hash.
|
|
||||||
pub fn update(&mut self, byte: u8) {
|
|
||||||
let byte = u32::from(byte);
|
|
||||||
self.a = (self.a + byte) % BASE;
|
|
||||||
self.b = (self.b + self.a) % BASE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Feeds a vector of bytes to the algorithm to update the hash.
|
|
||||||
pub fn update_buffer(&mut self, buffer: &[u8]) {
|
|
||||||
let len = buffer.len();
|
|
||||||
|
|
||||||
// in case user likes doing a byte at a time, keep it fast
|
|
||||||
if len == 1 {
|
|
||||||
self.update(buffer[0]);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// in case short lengths are provided, keep it somewhat fast
|
|
||||||
if len < 16 {
|
|
||||||
for byte in buffer.iter().take(len) {
|
|
||||||
self.a += u32::from(*byte);
|
|
||||||
self.b += self.a;
|
|
||||||
}
|
|
||||||
if self.a >= BASE {
|
|
||||||
self.a -= BASE;
|
|
||||||
}
|
|
||||||
self.b %= BASE;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut pos = 0;
|
|
||||||
|
|
||||||
// do length NMAX blocks -- requires just one modulo operation;
|
|
||||||
while pos + NMAX <= len {
|
|
||||||
let end = pos + NMAX;
|
|
||||||
while pos < end {
|
|
||||||
// 16 sums unrolled
|
|
||||||
do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
|
|
||||||
pos += 16;
|
|
||||||
}
|
|
||||||
self.a %= BASE;
|
|
||||||
self.b %= BASE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// do remaining bytes (less than NMAX, still just one modulo)
|
|
||||||
if pos < len { // avoid modulos if none remaining
|
|
||||||
while len - pos >= 16 {
|
|
||||||
do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
|
|
||||||
pos += 16;
|
|
||||||
}
|
|
||||||
while len - pos > 0 {
|
|
||||||
self.a += u32::from(buffer[pos]);
|
|
||||||
self.b += self.a;
|
|
||||||
pos += 1;
|
|
||||||
}
|
|
||||||
self.a %= BASE;
|
|
||||||
self.b %= BASE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume a Read object and returns the Adler32 hash.
|
|
||||||
pub fn adler32<R: io::Read>(mut reader: R) -> io::Result<u32> {
|
|
||||||
let mut hash = RollingAdler32::new();
|
|
||||||
let mut buffer = [0u8; NMAX];
|
|
||||||
let mut read = try!(reader.read(&mut buffer));
|
|
||||||
while read > 0 {
|
|
||||||
hash.update_buffer(&buffer[..read]);
|
|
||||||
read = try!(reader.read(&mut buffer));
|
|
||||||
}
|
|
||||||
Ok(hash.hash())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use rand;
|
|
||||||
use rand::Rng;
|
|
||||||
use std::io;
|
|
||||||
|
|
||||||
use super::{BASE, adler32, RollingAdler32};
|
|
||||||
|
|
||||||
fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
|
|
||||||
let mut a: u32 = 1;
|
|
||||||
let mut b: u32 = 0;
|
|
||||||
|
|
||||||
for byte in reader.bytes() {
|
|
||||||
let byte = try!(byte) as u32;
|
|
||||||
a = (a + byte) % BASE;
|
|
||||||
b = (b + a) % BASE;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok((b << 16) | a)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn testvectors() {
|
|
||||||
fn do_test(v: u32, bytes: &[u8]) {
|
|
||||||
let mut hash = RollingAdler32::new();
|
|
||||||
hash.update_buffer(&bytes);
|
|
||||||
assert_eq!(hash.hash(), v);
|
|
||||||
|
|
||||||
let r = io::Cursor::new(bytes);
|
|
||||||
assert_eq!(adler32(r).unwrap(), v);
|
|
||||||
}
|
|
||||||
do_test(0x00000001, b"");
|
|
||||||
do_test(0x00620062, b"a");
|
|
||||||
do_test(0x024d0127, b"abc");
|
|
||||||
do_test(0x29750586, b"message digest");
|
|
||||||
do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
|
|
||||||
do_test(0x8adb150c, b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
|
|
||||||
abcdefghijklmnopqrstuvwxyz\
|
|
||||||
0123456789");
|
|
||||||
do_test(0x97b61069, b"1234567890123456789012345678901234567890\
|
|
||||||
1234567890123456789012345678901234567890");
|
|
||||||
do_test(0xD6251498, &[255; 64000]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn compare() {
|
|
||||||
let mut rng = rand::thread_rng();
|
|
||||||
let mut data = vec![0u8; 5589];
|
|
||||||
for size in [0, 1, 3, 4, 5, 31, 32, 33, 67,
|
|
||||||
5550, 5552, 5553, 5568, 5584, 5589].iter().cloned() {
|
|
||||||
rng.fill_bytes(&mut data[..size]);
|
|
||||||
let r1 = io::Cursor::new(&data[..size]);
|
|
||||||
let r2 = r1.clone();
|
|
||||||
if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
|
|
||||||
panic!("Comparison failed, size={}", size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn rolling() {
|
|
||||||
assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
|
|
||||||
|
|
||||||
fn do_test(a: &[u8], b: &[u8]) {
|
|
||||||
let mut total = Vec::with_capacity(a.len() + b.len());
|
|
||||||
total.extend(a);
|
|
||||||
total.extend(b);
|
|
||||||
let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
|
|
||||||
for i in 0..(a.len()) {
|
|
||||||
h.remove(b.len(), a[i]);
|
|
||||||
h.update(total[b.len() + i]);
|
|
||||||
}
|
|
||||||
assert_eq!(h.hash(), adler32(b).unwrap());
|
|
||||||
}
|
|
||||||
do_test(b"a", b"b");
|
|
||||||
do_test(b"", b"this a test");
|
|
||||||
do_test(b"th", b"is a test");
|
|
||||||
do_test(b"this a ", b"test");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn long_window_remove() {
|
|
||||||
let mut hash = RollingAdler32::new();
|
|
||||||
let w = 65536;
|
|
||||||
assert!(w as u32 > BASE);
|
|
||||||
|
|
||||||
let mut bytes = vec![0; w*3];
|
|
||||||
for (i, b) in bytes.iter_mut().enumerate() {
|
|
||||||
*b = i as u8;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i, b) in bytes.iter().enumerate() {
|
|
||||||
if i >= w {
|
|
||||||
hash.remove(w, bytes[i - w]);
|
|
||||||
}
|
|
||||||
hash.update(*b);
|
|
||||||
if i > 0 && i % w == 0 {
|
|
||||||
assert_eq!(hash.hash(), 0x433a8772);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert_eq!(hash.hash(), 0xbbba8772);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1 @@
|
||||||
|
{"files":{"Cargo.toml":"9e33701960053fa4c3fe2e5700bdd1fc17c6a7ff3d1cd617cb4f23cc01123a0c","LICENSE":"f5af8beef8f5f88f1b78494703bbfa019c4f3630ac111344390d6f9975ab22ed","README.md":"022d9b80f7ecec822a9f005f311d990f94a061970e7b982c85978675ff48de17","src/bench.rs":"bf3353d119660f44e4c2ef06d34c74e9585984cd7a82df609d51250476bdf2d0","src/lib.rs":"4f203fd48b12052f950213249a55db0b1c6cde93fe3bdf26b70a6eb42c6c9dee"},"package":"aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"}
|
|
@ -0,0 +1,57 @@
|
||||||
|
"""
|
||||||
|
@generated
|
||||||
|
cargo-raze crate build file.
|
||||||
|
|
||||||
|
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||||
|
"""
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load(
|
||||||
|
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||||
|
"rust_binary",
|
||||||
|
"rust_library",
|
||||||
|
"rust_test",
|
||||||
|
)
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||||
|
|
||||||
|
package(default_visibility = [
|
||||||
|
# Public for visibility by "@raze__crate__version//" targets.
|
||||||
|
#
|
||||||
|
# Prefer access through "//third_party/cargo", which limits external
|
||||||
|
# visibility to explicit Cargo.toml dependencies.
|
||||||
|
"//visibility:public",
|
||||||
|
])
|
||||||
|
|
||||||
|
licenses([
|
||||||
|
"notice", # Zlib from expression "Zlib"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Generated Targets
|
||||||
|
|
||||||
|
# Unsupported target "bench" with type "bench" omitted
|
||||||
|
|
||||||
|
rust_library(
|
||||||
|
name = "adler32",
|
||||||
|
srcs = glob(["**/*.rs"]),
|
||||||
|
crate_features = [
|
||||||
|
"default",
|
||||||
|
"std",
|
||||||
|
],
|
||||||
|
crate_root = "src/lib.rs",
|
||||||
|
crate_type = "lib",
|
||||||
|
data = [],
|
||||||
|
edition = "2018",
|
||||||
|
rustc_flags = [
|
||||||
|
"--cap-lints=allow",
|
||||||
|
],
|
||||||
|
tags = [
|
||||||
|
"cargo-raze",
|
||||||
|
"manual",
|
||||||
|
],
|
||||||
|
version = "1.2.0",
|
||||||
|
# buildifier: leave-alone
|
||||||
|
deps = [
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,61 @@
|
||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||||
|
#
|
||||||
|
# When uploading crates to the registry Cargo will automatically
|
||||||
|
# "normalize" Cargo.toml files for maximal compatibility
|
||||||
|
# with all versions of Cargo and also rewrite `path` dependencies
|
||||||
|
# to registry (e.g., crates.io) dependencies
|
||||||
|
#
|
||||||
|
# If you believe there's an error in this file please file an
|
||||||
|
# issue against the rust-lang/cargo repository. If you're
|
||||||
|
# editing this file be aware that the upstream Cargo.toml
|
||||||
|
# will likely look very different (and much more reasonable)
|
||||||
|
|
||||||
|
[package]
|
||||||
|
edition = "2018"
|
||||||
|
name = "adler32"
|
||||||
|
version = "1.2.0"
|
||||||
|
authors = ["Remi Rampin <remirampin@gmail.com>"]
|
||||||
|
description = "Minimal Adler32 implementation for Rust."
|
||||||
|
documentation = "https://docs.rs/adler32/"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = ["adler32", "hash", "rolling"]
|
||||||
|
license = "Zlib"
|
||||||
|
repository = "https://github.com/remram44/adler32-rs"
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
bench = false
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "bench"
|
||||||
|
path = "src/bench.rs"
|
||||||
|
harness = false
|
||||||
|
[dependencies.compiler_builtins]
|
||||||
|
version = "0.1.2"
|
||||||
|
optional = true
|
||||||
|
|
||||||
|
[dependencies.core]
|
||||||
|
version = "1.0.0"
|
||||||
|
optional = true
|
||||||
|
package = "rustc-std-workspace-core"
|
||||||
|
[dev-dependencies.criterion]
|
||||||
|
version = "0.3"
|
||||||
|
|
||||||
|
[dev-dependencies.getrandom]
|
||||||
|
version = "0.1"
|
||||||
|
features = ["wasm-bindgen"]
|
||||||
|
|
||||||
|
[dev-dependencies.humansize]
|
||||||
|
version = "1.1"
|
||||||
|
|
||||||
|
[dev-dependencies.rand]
|
||||||
|
version = "0.7"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["std"]
|
||||||
|
rustc-dep-of-std = ["core", "compiler_builtins"]
|
||||||
|
std = []
|
||||||
|
[target."cfg(target_arch = \"wasm32\")".dev-dependencies.wasm-bindgen]
|
||||||
|
version = "0.2.63"
|
||||||
|
|
||||||
|
[target."cfg(target_arch = \"wasm32\")".dev-dependencies.wasm-bindgen-test]
|
||||||
|
version = "0.3"
|
|
@ -0,0 +1,43 @@
|
||||||
|
Copyright notice for the Rust port:
|
||||||
|
|
||||||
|
(C) 2016 Remi Rampin and adler32-rs contributors
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright notice for the original C code from the zlib project:
|
||||||
|
|
||||||
|
(C) 1995-2017 Jean-loup Gailly and Mark Adler
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
|
||||||
|
Jean-loup Gailly Mark Adler
|
||||||
|
jloup@gzip.org madler@alumni.caltech.edu
|
|
@ -0,0 +1,17 @@
|
||||||
|
[![Build Status](https://github.com/remram44/adler32-rs/workflows/Test/badge.svg)](https://github.com/remram44/adler32-rs/actions)
|
||||||
|
[![Win Build](https://ci.appveyor.com/api/projects/status/ekyg20rd6rwrus64/branch/master?svg=true)](https://ci.appveyor.com/project/remram44/adler32-rs)
|
||||||
|
[![Crates.io](https://img.shields.io/crates/v/adler32.svg)](https://crates.io/crates/adler32)
|
||||||
|
[![Documentation](https://docs.rs/adler32/badge.svg)](https://docs.rs/adler32)
|
||||||
|
[![License](https://img.shields.io/crates/l/adler32.svg)](https://github.com/remram44/adler32-rs/blob/master/LICENSE)
|
||||||
|
|
||||||
|
What is this?
|
||||||
|
=============
|
||||||
|
|
||||||
|
It is an implementation of the [Adler32 rolling hash algorithm](https://en.wikipedia.org/wiki/Adler-32) in the [Rust programming language](https://www.rust-lang.org/).
|
||||||
|
|
||||||
|
It is adapted from Jean-Loup Gailly's and Mark Adler's [original implementation in zlib](https://github.com/madler/zlib/blob/2fa463bacfff79181df1a5270fb67cc679a53e71/adler32.c).
|
||||||
|
|
||||||
|
|
||||||
|
#### Minimum Supported Version of Rust (MSRV)
|
||||||
|
|
||||||
|
`adler32-rs` can be built with Rust version 1.33 or later. This version may be raised in the future but that will be accompanied by a minor version increase.
|
|
@ -0,0 +1,30 @@
|
||||||
|
use adler32::RollingAdler32;
|
||||||
|
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||||
|
use humansize::{file_size_opts, FileSize};
|
||||||
|
use rand::Rng;
|
||||||
|
|
||||||
|
fn bench_update_buffer(c: &mut Criterion) {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
let mut group = c.benchmark_group("update_buffer");
|
||||||
|
for &size in [512, 100 * 1024].iter() {
|
||||||
|
let mut adler = RollingAdler32::new();
|
||||||
|
let formatted_size = size.file_size(file_size_opts::BINARY).unwrap();
|
||||||
|
let in_bytes = {
|
||||||
|
let mut in_bytes = vec![0u8; size];
|
||||||
|
rng.fill(&mut in_bytes[..]);
|
||||||
|
in_bytes
|
||||||
|
};
|
||||||
|
|
||||||
|
group.throughput(Throughput::Bytes(size as u64));
|
||||||
|
group.bench_with_input(
|
||||||
|
BenchmarkId::from_parameter(formatted_size),
|
||||||
|
&in_bytes,
|
||||||
|
|b, data| {
|
||||||
|
b.iter(|| adler.update_buffer(data));
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(bench_default, bench_update_buffer);
|
||||||
|
criterion_main!(bench_default);
|
|
@ -0,0 +1,324 @@
|
||||||
|
//! A minimal implementation of Adler32 for Rust.
|
||||||
|
//!
|
||||||
|
//! This provides the simple method adler32(), that exhausts a Read and
|
||||||
|
//! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
|
||||||
|
//! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
|
||||||
|
//! fashion.
|
||||||
|
//!
|
||||||
|
//! The adler32 code has been translated (as accurately as I could manage) from
|
||||||
|
//! the zlib implementation.
|
||||||
|
|
||||||
|
#![forbid(unsafe_code)]
|
||||||
|
#![cfg_attr(not(feature = "std"), no_std)]
|
||||||
|
|
||||||
|
|
||||||
|
// adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
|
||||||
|
// It was translated into Rust as accurately as I could manage
|
||||||
|
// The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
|
||||||
|
|
||||||
|
/* zlib.h -- interface of the 'zlib' general purpose compression library
|
||||||
|
version 1.2.8, April 28th, 2013
|
||||||
|
|
||||||
|
Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
|
||||||
|
Jean-loup Gailly Mark Adler
|
||||||
|
jloup@gzip.org madler@alumni.caltech.edu
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
// largest prime smaller than 65536
|
||||||
|
const BASE: u32 = 65521;
|
||||||
|
|
||||||
|
// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
|
||||||
|
const NMAX: usize = 5552;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
|
||||||
|
*adler += u32::from(buf[0]);
|
||||||
|
*sum2 += *adler;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
|
||||||
|
do1(adler, sum2, &buf[0..1]);
|
||||||
|
do1(adler, sum2, &buf[1..2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
|
||||||
|
do2(adler, sum2, &buf[0..2]);
|
||||||
|
do2(adler, sum2, &buf[2..4]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
|
||||||
|
do4(adler, sum2, &buf[0..4]);
|
||||||
|
do4(adler, sum2, &buf[4..8]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
|
||||||
|
do8(adler, sum2, &buf[0..8]);
|
||||||
|
do8(adler, sum2, &buf[8..16]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A rolling version of the Adler32 hash, which can 'forget' past bytes.
|
||||||
|
///
|
||||||
|
/// Calling remove() will update the hash to the value it would have if that
|
||||||
|
/// past byte had never been fed to the algorithm. This allows you to get the
|
||||||
|
/// hash of a rolling window very efficiently.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct RollingAdler32 {
|
||||||
|
a: u32,
|
||||||
|
b: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for RollingAdler32 {
|
||||||
|
fn default() -> RollingAdler32 {
|
||||||
|
RollingAdler32::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RollingAdler32 {
|
||||||
|
/// Creates an empty Adler32 context (with hash 1).
|
||||||
|
pub fn new() -> RollingAdler32 {
|
||||||
|
Self::from_value(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates an Adler32 context with the given initial value.
|
||||||
|
pub fn from_value(adler32: u32) -> RollingAdler32 {
|
||||||
|
let a = adler32 & 0xFFFF;
|
||||||
|
let b = adler32 >> 16;
|
||||||
|
RollingAdler32 { a, b }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience function initializing a context from the hash of a buffer.
|
||||||
|
pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
|
||||||
|
let mut hash = RollingAdler32::new();
|
||||||
|
hash.update_buffer(buffer);
|
||||||
|
hash
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the current hash.
|
||||||
|
pub fn hash(&self) -> u32 {
|
||||||
|
(self.b << 16) | self.a
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
|
||||||
|
pub fn remove(&mut self, size: usize, byte: u8) {
|
||||||
|
let byte = u32::from(byte);
|
||||||
|
self.a = (self.a + BASE - byte) % BASE;
|
||||||
|
self.b = ((self.b + BASE - 1)
|
||||||
|
.wrapping_add(BASE.wrapping_sub(size as u32).wrapping_mul(byte)))
|
||||||
|
% BASE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Feeds a new `byte` to the algorithm to update the hash.
|
||||||
|
pub fn update(&mut self, byte: u8) {
|
||||||
|
let byte = u32::from(byte);
|
||||||
|
self.a = (self.a + byte) % BASE;
|
||||||
|
self.b = (self.b + self.a) % BASE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Feeds a vector of bytes to the algorithm to update the hash.
|
||||||
|
pub fn update_buffer(&mut self, buffer: &[u8]) {
|
||||||
|
let len = buffer.len();
|
||||||
|
|
||||||
|
// in case user likes doing a byte at a time, keep it fast
|
||||||
|
if len == 1 {
|
||||||
|
self.update(buffer[0]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// in case short lengths are provided, keep it somewhat fast
|
||||||
|
if len < 16 {
|
||||||
|
for byte in buffer.iter().take(len) {
|
||||||
|
self.a += u32::from(*byte);
|
||||||
|
self.b += self.a;
|
||||||
|
}
|
||||||
|
if self.a >= BASE {
|
||||||
|
self.a -= BASE;
|
||||||
|
}
|
||||||
|
self.b %= BASE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut pos = 0;
|
||||||
|
|
||||||
|
// do length NMAX blocks -- requires just one modulo operation;
|
||||||
|
while pos + NMAX <= len {
|
||||||
|
let end = pos + NMAX;
|
||||||
|
while pos < end {
|
||||||
|
// 16 sums unrolled
|
||||||
|
do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
|
||||||
|
pos += 16;
|
||||||
|
}
|
||||||
|
self.a %= BASE;
|
||||||
|
self.b %= BASE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// do remaining bytes (less than NMAX, still just one modulo)
|
||||||
|
if pos < len {
|
||||||
|
// avoid modulos if none remaining
|
||||||
|
while len - pos >= 16 {
|
||||||
|
do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
|
||||||
|
pos += 16;
|
||||||
|
}
|
||||||
|
while len - pos > 0 {
|
||||||
|
self.a += u32::from(buffer[pos]);
|
||||||
|
self.b += self.a;
|
||||||
|
pos += 1;
|
||||||
|
}
|
||||||
|
self.a %= BASE;
|
||||||
|
self.b %= BASE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consume a Read object and returns the Adler32 hash.
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
pub fn adler32<R: std::io::Read>(mut reader: R) -> std::io::Result<u32> {
|
||||||
|
let mut hash = RollingAdler32::new();
|
||||||
|
let mut buffer = [0u8; NMAX];
|
||||||
|
let mut read = reader.read(&mut buffer)?;
|
||||||
|
while read > 0 {
|
||||||
|
hash.update_buffer(&buffer[..read]);
|
||||||
|
read = reader.read(&mut buffer)?;
|
||||||
|
}
|
||||||
|
Ok(hash.hash())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use rand::Rng;
|
||||||
|
use std::io;
|
||||||
|
#[cfg(target_arch = "wasm32")]
|
||||||
|
use wasm_bindgen_test::wasm_bindgen_test;
|
||||||
|
|
||||||
|
use super::{adler32, RollingAdler32, BASE};
|
||||||
|
|
||||||
|
fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
|
||||||
|
let mut a: u32 = 1;
|
||||||
|
let mut b: u32 = 0;
|
||||||
|
|
||||||
|
for byte in reader.bytes() {
|
||||||
|
let byte = byte? as u32;
|
||||||
|
a = (a + byte) % BASE;
|
||||||
|
b = (b + a) % BASE;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((b << 16) | a)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||||
|
fn testvectors() {
|
||||||
|
fn do_test(v: u32, bytes: &[u8]) {
|
||||||
|
let mut hash = RollingAdler32::new();
|
||||||
|
hash.update_buffer(&bytes);
|
||||||
|
assert_eq!(hash.hash(), v);
|
||||||
|
|
||||||
|
let r = io::Cursor::new(bytes);
|
||||||
|
assert_eq!(adler32(r).unwrap(), v);
|
||||||
|
}
|
||||||
|
do_test(0x00000001, b"");
|
||||||
|
do_test(0x00620062, b"a");
|
||||||
|
do_test(0x024d0127, b"abc");
|
||||||
|
do_test(0x29750586, b"message digest");
|
||||||
|
do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
|
||||||
|
do_test(
|
||||||
|
0x8adb150c,
|
||||||
|
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
|
||||||
|
abcdefghijklmnopqrstuvwxyz\
|
||||||
|
0123456789",
|
||||||
|
);
|
||||||
|
do_test(
|
||||||
|
0x97b61069,
|
||||||
|
b"1234567890123456789012345678901234567890\
|
||||||
|
1234567890123456789012345678901234567890",
|
||||||
|
);
|
||||||
|
do_test(0xD6251498, &[255; 64000]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||||
|
fn compare() {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
let mut data = vec![0u8; 5589];
|
||||||
|
for size in [
|
||||||
|
0, 1, 3, 4, 5, 31, 32, 33, 67, 5550, 5552, 5553, 5568, 5584, 5589,
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
.cloned()
|
||||||
|
{
|
||||||
|
rng.fill(&mut data[..size]);
|
||||||
|
let r1 = io::Cursor::new(&data[..size]);
|
||||||
|
let r2 = r1.clone();
|
||||||
|
if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
|
||||||
|
panic!("Comparison failed, size={}", size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||||
|
fn rolling() {
|
||||||
|
assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
|
||||||
|
|
||||||
|
fn do_test(a: &[u8], b: &[u8]) {
|
||||||
|
let mut total = Vec::with_capacity(a.len() + b.len());
|
||||||
|
total.extend(a);
|
||||||
|
total.extend(b);
|
||||||
|
let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
|
||||||
|
for i in 0..(a.len()) {
|
||||||
|
h.remove(b.len(), a[i]);
|
||||||
|
h.update(total[b.len() + i]);
|
||||||
|
}
|
||||||
|
assert_eq!(h.hash(), adler32(b).unwrap());
|
||||||
|
}
|
||||||
|
do_test(b"a", b"b");
|
||||||
|
do_test(b"", b"this a test");
|
||||||
|
do_test(b"th", b"is a test");
|
||||||
|
do_test(b"this a ", b"test");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||||
|
fn long_window_remove() {
|
||||||
|
let mut hash = RollingAdler32::new();
|
||||||
|
let w = 65536;
|
||||||
|
assert!(w as u32 > BASE);
|
||||||
|
|
||||||
|
let mut bytes = vec![0; w * 3];
|
||||||
|
for (i, b) in bytes.iter_mut().enumerate() {
|
||||||
|
*b = i as u8;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i, b) in bytes.iter().enumerate() {
|
||||||
|
if i >= w {
|
||||||
|
hash.remove(w, bytes[i - w]);
|
||||||
|
}
|
||||||
|
hash.update(*b);
|
||||||
|
if i > 0 && i % w == 0 {
|
||||||
|
assert_eq!(hash.hash(), 0x433a8772);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert_eq!(hash.hash(), 0xbbba8772);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1 +0,0 @@
|
||||||
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"bf3140f591d5e16e2c178bfdc39bc2ea9fecf3b50963ff60343d3e5a68d024cc","DESIGN.md":"9065f33d818d1562244d36dc4781e2a351108030cee17f11c2ba512ca7b4c27e","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"f679a3a8fa99694e00e2ed8ec9cd6f7dc28eee36f47c472411744aabb0556d0b","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"46c57a83a75a8f25fdf19a15deae10748d12b8af9445ae74700a546a92024608","src/automaton.rs":"ea3fc2648e026eac9f9969b0d457e49af7b4a40044379ce010d054f22afbc98f","src/buffer.rs":"0641828d1058b9c1c16e8e2445ce05c94b0ad2d97736a7e3cd8b913fa8edd7fd","src/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/classes.rs":"166c9f15c9a2e370e2bc9a9e1620bb2db13df52edfde9a0db1f20144519a7e72","src/dfa.rs":"e34f485a7c3257d2edee16fcdb2a5586aa0d2aa9e34d624288eb2b5a0f7cc65b","src/error.rs":"36dbf2cefbfaa8a69186551320dbff023d3e82780a6c925e87c3e3997b967e66","src/lib.rs":"a2a65d72cbe1eed1964c3fb080e5fa54245ab208a3c855531c1036f05e073452","src/nfa.rs":"6e8fe7633033c378e5487604a2772af3fc2eca011fe374fe0b6d6cee98198f58","src/packed/api.rs":"6c65dfa177b7d7b79f90a048f260bec7f817126c693b85f49704c7d2ecf5f646","src/packed/mod.rs":"29c76ad3cbb1f831140cefac7a27fb504ac4af4f454975a571965b48aad417eb","src/packed/pattern.rs":"b88c57af057997da0a5a06f4c5604a7e598c20acfc11c15cd8977727f6e1cf9c","src/packed/rabinkarp.rs":"b3242a8631ea5607163dcbb641e4ac9c6da26774378da1e51651b0ab5656b390","src/packed/teddy/README.md":"5819f40d221af93288e705eadef5393a41d7a0900881b4d676e01fd65d5adf15","src/packed/teddy/compile.rs":"5d7de6a45a84bb2322647a6de7a7b1573837b9222b16e348f023b8d47e0a5130","src/packed/teddy/mod.rs":"f63db3419b1d378929bf0bc1f0e3b909ff3c38b9f2b6e86ba4546b8f39907cd3","src/packed/teddy/runtime.rs":"0a1250ea73159b3be6e0fa9a3f55ecedbb2cb90cb798d1709e9f5ee48f8855d5","src/packed/tests.rs":"0b52ab9eef73a1a4f141f475a9fa98e54d447104aa69acba3a7f8248ce7164b2","src/packed/vector.rs":"ab3c0535fca5f09198d58cbfae44c292aeb3ce44bc92bca36d30dc72963639fc","src/prefilter.rs":"f615e929629f9356fb779a4456a0b6b1ee139960029df71d41620bf3fed9282d","src/state_id.rs":"50958ca2b089d775fb4e49a64950e2f1e8a4af1772fe782ae3715a7745dcc6d7","src/tests.rs":"7458d220c78bbc382c1332e0a222f7e47b6b8ff1fac666d46db4c3a9e63cef4c"},"package":"8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"}
|
|
|
@ -1,56 +0,0 @@
|
||||||
"""
|
|
||||||
@generated
|
|
||||||
cargo-raze crate build file.
|
|
||||||
|
|
||||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
|
||||||
"""
|
|
||||||
|
|
||||||
# buildifier: disable=load
|
|
||||||
load(
|
|
||||||
"@io_bazel_rules_rust//rust:rust.bzl",
|
|
||||||
"rust_binary",
|
|
||||||
"rust_library",
|
|
||||||
"rust_test",
|
|
||||||
)
|
|
||||||
|
|
||||||
# buildifier: disable=load
|
|
||||||
load("@bazel_skylib//lib:selects.bzl", "selects")
|
|
||||||
|
|
||||||
package(default_visibility = [
|
|
||||||
# Public for visibility by "@raze__crate__version//" targets.
|
|
||||||
#
|
|
||||||
# Prefer access through "//third_party/cargo", which limits external
|
|
||||||
# visibility to explicit Cargo.toml dependencies.
|
|
||||||
"//visibility:public",
|
|
||||||
])
|
|
||||||
|
|
||||||
licenses([
|
|
||||||
"unencumbered", # Unlicense from expression "Unlicense OR MIT"
|
|
||||||
])
|
|
||||||
|
|
||||||
# Generated Targets
|
|
||||||
|
|
||||||
rust_library(
|
|
||||||
name = "aho_corasick",
|
|
||||||
srcs = glob(["**/*.rs"]),
|
|
||||||
crate_features = [
|
|
||||||
"default",
|
|
||||||
"std",
|
|
||||||
],
|
|
||||||
crate_root = "src/lib.rs",
|
|
||||||
crate_type = "lib",
|
|
||||||
data = [],
|
|
||||||
edition = "2015",
|
|
||||||
rustc_flags = [
|
|
||||||
"--cap-lints=allow",
|
|
||||||
],
|
|
||||||
tags = [
|
|
||||||
"cargo-raze",
|
|
||||||
"manual",
|
|
||||||
],
|
|
||||||
version = "0.7.10",
|
|
||||||
# buildifier: leave-alone
|
|
||||||
deps = [
|
|
||||||
"//third_party/cargo/vendor/memchr-2.3.3:memchr",
|
|
||||||
],
|
|
||||||
)
|
|
|
@ -1,42 +0,0 @@
|
||||||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
|
||||||
#
|
|
||||||
# When uploading crates to the registry Cargo will automatically
|
|
||||||
# "normalize" Cargo.toml files for maximal compatibility
|
|
||||||
# with all versions of Cargo and also rewrite `path` dependencies
|
|
||||||
# to registry (e.g., crates.io) dependencies
|
|
||||||
#
|
|
||||||
# If you believe there's an error in this file please file an
|
|
||||||
# issue against the rust-lang/cargo repository. If you're
|
|
||||||
# editing this file be aware that the upstream Cargo.toml
|
|
||||||
# will likely look very different (and much more reasonable)
|
|
||||||
|
|
||||||
[package]
|
|
||||||
name = "aho-corasick"
|
|
||||||
version = "0.7.10"
|
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
|
||||||
exclude = ["/aho-corasick-debug", "/ci/*", "/.travis.yml", "/appveyor.yml"]
|
|
||||||
autotests = false
|
|
||||||
description = "Fast multiple substring searching."
|
|
||||||
homepage = "https://github.com/BurntSushi/aho-corasick"
|
|
||||||
readme = "README.md"
|
|
||||||
keywords = ["string", "search", "text", "aho", "multi"]
|
|
||||||
categories = ["text-processing"]
|
|
||||||
license = "Unlicense/MIT"
|
|
||||||
repository = "https://github.com/BurntSushi/aho-corasick"
|
|
||||||
[profile.bench]
|
|
||||||
debug = true
|
|
||||||
|
|
||||||
[profile.release]
|
|
||||||
debug = true
|
|
||||||
|
|
||||||
[lib]
|
|
||||||
name = "aho_corasick"
|
|
||||||
[dependencies.memchr]
|
|
||||||
version = "2.2.0"
|
|
||||||
default-features = false
|
|
||||||
[dev-dependencies.doc-comment]
|
|
||||||
version = "0.3.1"
|
|
||||||
|
|
||||||
[features]
|
|
||||||
default = ["std"]
|
|
||||||
std = ["memchr/use_std"]
|
|
|
@ -1,186 +0,0 @@
|
||||||
aho-corasick
|
|
||||||
============
|
|
||||||
A library for finding occurrences of many patterns at once with SIMD
|
|
||||||
acceleration in some cases. This library provides multiple pattern
|
|
||||||
search principally through an implementation of the
|
|
||||||
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
|
|
||||||
which builds a finite state machine for executing searches in linear time.
|
|
||||||
Features include case insensitive matching, overlapping matches and search &
|
|
||||||
replace in streams.
|
|
||||||
|
|
||||||
[![Build status](https://github.com/BurntSushi/aho-corasick/workflows/ci/badge.svg)](https://github.com/BurntSushi/aho-corasick/actions)
|
|
||||||
[![](http://meritbadge.herokuapp.com/aho-corasick)](https://crates.io/crates/aho-corasick)
|
|
||||||
|
|
||||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
|
||||||
|
|
||||||
|
|
||||||
### Documentation
|
|
||||||
|
|
||||||
https://docs.rs/aho-corasick
|
|
||||||
|
|
||||||
|
|
||||||
### Usage
|
|
||||||
|
|
||||||
Add this to your `Cargo.toml`:
|
|
||||||
|
|
||||||
```toml
|
|
||||||
[dependencies]
|
|
||||||
aho-corasick = "0.7"
|
|
||||||
```
|
|
||||||
|
|
||||||
and this to your crate root (if you're using Rust 2015):
|
|
||||||
|
|
||||||
```rust
|
|
||||||
extern crate aho_corasick;
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Example: basic searching
|
|
||||||
|
|
||||||
This example shows how to search for occurrences of multiple patterns
|
|
||||||
simultaneously. Each match includes the pattern that matched along with the
|
|
||||||
byte offsets of the match.
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use aho_corasick::AhoCorasick;
|
|
||||||
|
|
||||||
let patterns = &["apple", "maple", "Snapple"];
|
|
||||||
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
|
||||||
|
|
||||||
let ac = AhoCorasick::new(patterns);
|
|
||||||
let mut matches = vec![];
|
|
||||||
for mat in ac.find_iter(haystack) {
|
|
||||||
matches.push((mat.pattern(), mat.start(), mat.end()));
|
|
||||||
}
|
|
||||||
assert_eq!(matches, vec![
|
|
||||||
(1, 13, 18),
|
|
||||||
(0, 28, 33),
|
|
||||||
(2, 43, 50),
|
|
||||||
]);
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Example: case insensitivity
|
|
||||||
|
|
||||||
This is like the previous example, but matches `Snapple` case insensitively
|
|
||||||
using `AhoCorasickBuilder`:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use aho_corasick::AhoCorasickBuilder;
|
|
||||||
|
|
||||||
let patterns = &["apple", "maple", "snapple"];
|
|
||||||
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
|
||||||
|
|
||||||
let ac = AhoCorasickBuilder::new()
|
|
||||||
.ascii_case_insensitive(true)
|
|
||||||
.build(patterns);
|
|
||||||
let mut matches = vec![];
|
|
||||||
for mat in ac.find_iter(haystack) {
|
|
||||||
matches.push((mat.pattern(), mat.start(), mat.end()));
|
|
||||||
}
|
|
||||||
assert_eq!(matches, vec![
|
|
||||||
(1, 13, 18),
|
|
||||||
(0, 28, 33),
|
|
||||||
(2, 43, 50),
|
|
||||||
]);
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Example: replacing matches in a stream
|
|
||||||
|
|
||||||
This example shows how to execute a search and replace on a stream without
|
|
||||||
loading the entire stream into memory first.
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use aho_corasick::AhoCorasick;
|
|
||||||
|
|
||||||
let patterns = &["fox", "brown", "quick"];
|
|
||||||
let replace_with = &["sloth", "grey", "slow"];
|
|
||||||
|
|
||||||
// In a real example, these might be `std::fs::File`s instead. All you need to
|
|
||||||
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
|
|
||||||
let rdr = "The quick brown fox.";
|
|
||||||
let mut wtr = vec![];
|
|
||||||
|
|
||||||
let ac = AhoCorasick::new(patterns);
|
|
||||||
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
|
|
||||||
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Example: finding the leftmost first match
|
|
||||||
|
|
||||||
In the textbook description of Aho-Corasick, its formulation is typically
|
|
||||||
structured such that it reports all possible matches, even when they overlap
|
|
||||||
with another. In many cases, overlapping matches may not be desired, such as
|
|
||||||
the case of finding all successive non-overlapping matches like you might with
|
|
||||||
a standard regular expression.
|
|
||||||
|
|
||||||
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
|
|
||||||
this doesn't always work in the expected way, since it will report matches as
|
|
||||||
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
|
|
||||||
against the text `Samwise`. Most regex engines (that are Perl-like, or
|
|
||||||
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
|
|
||||||
algorithm modified for reporting non-overlapping matches will report `Sam`.
|
|
||||||
|
|
||||||
A novel contribution of this library is the ability to change the match
|
|
||||||
semantics of Aho-Corasick (without additional search time overhead) such that
|
|
||||||
`Samwise` is reported instead. For example, here's the standard approach:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use aho_corasick::AhoCorasick;
|
|
||||||
|
|
||||||
let patterns = &["Samwise", "Sam"];
|
|
||||||
let haystack = "Samwise";
|
|
||||||
|
|
||||||
let ac = AhoCorasick::new(patterns);
|
|
||||||
let mat = ac.find(haystack).expect("should have a match");
|
|
||||||
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
|
|
||||||
```
|
|
||||||
|
|
||||||
And now here's the leftmost-first version, which matches how a Perl-like
|
|
||||||
regex will work:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use aho_corasick::{AhoCorasickBuilder, MatchKind};
|
|
||||||
|
|
||||||
let patterns = &["Samwise", "Sam"];
|
|
||||||
let haystack = "Samwise";
|
|
||||||
|
|
||||||
let ac = AhoCorasickBuilder::new()
|
|
||||||
.match_kind(MatchKind::LeftmostFirst)
|
|
||||||
.build(patterns);
|
|
||||||
let mat = ac.find(haystack).expect("should have a match");
|
|
||||||
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
|
|
||||||
```
|
|
||||||
|
|
||||||
In addition to leftmost-first semantics, this library also supports
|
|
||||||
leftmost-longest semantics, which match the POSIX behavior of a regular
|
|
||||||
expression alternation. See `MatchKind` in the docs for more details.
|
|
||||||
|
|
||||||
|
|
||||||
### Minimum Rust version policy
|
|
||||||
|
|
||||||
This crate's minimum supported `rustc` version is `1.28.0`.
|
|
||||||
|
|
||||||
The current policy is that the minimum Rust version required to use this crate
|
|
||||||
can be increased in minor version updates. For example, if `crate 1.0` requires
|
|
||||||
Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust
|
|
||||||
1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum
|
|
||||||
version of Rust.
|
|
||||||
|
|
||||||
In general, this crate will be conservative with respect to the minimum
|
|
||||||
supported version of Rust.
|
|
||||||
|
|
||||||
|
|
||||||
### Future work
|
|
||||||
|
|
||||||
Here are some plans for the future:
|
|
||||||
|
|
||||||
* Assuming the current API is sufficient, I'd like to commit to it and release
|
|
||||||
a `1.0` version of this crate some time in the next 6-12 months.
|
|
||||||
* Support stream searching with leftmost match semantics. Currently, only
|
|
||||||
standard match semantics are supported. Getting this right seems possible,
|
|
||||||
but is tricky since the match state needs to be propagated through multiple
|
|
||||||
searches. (With standard semantics, as soon as a match is seen the search
|
|
||||||
ends.)
|
|
|
@ -1,130 +0,0 @@
|
||||||
use std::cmp;
|
|
||||||
use std::io;
|
|
||||||
use std::ptr;
|
|
||||||
|
|
||||||
/// The default buffer capacity that we use for the stream buffer.
|
|
||||||
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB
|
|
||||||
|
|
||||||
/// A fairly simple roll buffer for supporting stream searches.
|
|
||||||
///
|
|
||||||
/// This buffer acts as a temporary place to store a fixed amount of data when
|
|
||||||
/// reading from a stream. Its central purpose is to allow "rolling" some
|
|
||||||
/// suffix of the data to the beginning of the buffer before refilling it with
|
|
||||||
/// more data from the stream. For example, let's say we are trying to match
|
|
||||||
/// "foobar" on a stream. When we report the match, we'd like to not only
|
|
||||||
/// report the correct offsets at which the match occurs, but also the matching
|
|
||||||
/// bytes themselves. So let's say our stream is a file with the following
|
|
||||||
/// contents: `test test foobar test test`. Now assume that we happen to read
|
|
||||||
/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
|
|
||||||
/// Naively, it would not be possible to report a single contiguous `foobar`
|
|
||||||
/// match, but this roll buffer allows us to do that. Namely, after the second
|
|
||||||
/// read, the contents of the buffer should be `st foobar test test`, where the
|
|
||||||
/// search should ultimately resume immediately after `foo`. (The prefix `st `
|
|
||||||
/// is included because the roll buffer saves N bytes at the end of the buffer,
|
|
||||||
/// where N is the maximum possible length of a match.)
|
|
||||||
///
|
|
||||||
/// A lot of the logic for dealing with this is unfortunately split out between
|
|
||||||
/// this roll buffer and the `StreamChunkIter`.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Buffer {
|
|
||||||
/// The raw buffer contents. This has a fixed size and never increases.
|
|
||||||
buf: Vec<u8>,
|
|
||||||
/// The minimum size of the buffer, which is equivalent to the maximum
|
|
||||||
/// possible length of a match. This corresponds to the amount that we
|
|
||||||
/// roll
|
|
||||||
min: usize,
|
|
||||||
/// The end of the contents of this buffer.
|
|
||||||
end: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Buffer {
|
|
||||||
/// Create a new buffer for stream searching. The minimum buffer length
|
|
||||||
/// given should be the size of the maximum possible match length.
|
|
||||||
pub fn new(min_buffer_len: usize) -> Buffer {
|
|
||||||
let min = cmp::max(1, min_buffer_len);
|
|
||||||
// The minimum buffer amount is also the amount that we roll our
|
|
||||||
// buffer in order to support incremental searching. To this end,
|
|
||||||
// our actual capacity needs to be at least 1 byte bigger than our
|
|
||||||
// minimum amount, otherwise we won't have any overlap. In actuality,
|
|
||||||
// we want our buffer to be a bit bigger than that for performance
|
|
||||||
// reasons, so we set a lower bound of `8 * min`.
|
|
||||||
//
|
|
||||||
// TODO: It would be good to find a way to test the streaming
|
|
||||||
// implementation with the minimal buffer size.
|
|
||||||
let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
|
|
||||||
Buffer { buf: vec![0; capacity], min, end: 0 }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the contents of this buffer.
|
|
||||||
#[inline]
|
|
||||||
pub fn buffer(&self) -> &[u8] {
|
|
||||||
&self.buf[..self.end]
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the minimum size of the buffer. The only way a buffer may be
|
|
||||||
/// smaller than this is if the stream itself contains less than the
|
|
||||||
/// minimum buffer amount.
|
|
||||||
#[inline]
|
|
||||||
pub fn min_buffer_len(&self) -> usize {
|
|
||||||
self.min
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the total length of the contents in the buffer.
|
|
||||||
#[inline]
|
|
||||||
pub fn len(&self) -> usize {
|
|
||||||
self.end
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return all free capacity in this buffer.
|
|
||||||
fn free_buffer(&mut self) -> &mut [u8] {
|
|
||||||
&mut self.buf[self.end..]
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Refill the contents of this buffer by reading as much as possible into
|
|
||||||
/// this buffer's free capacity. If no more bytes could be read, then this
|
|
||||||
/// returns false. Otherwise, this reads until it has filled the buffer
|
|
||||||
/// past the minimum amount.
|
|
||||||
pub fn fill<R: io::Read>(&mut self, mut rdr: R) -> io::Result<bool> {
|
|
||||||
let mut readany = false;
|
|
||||||
loop {
|
|
||||||
let readlen = rdr.read(self.free_buffer())?;
|
|
||||||
if readlen == 0 {
|
|
||||||
return Ok(readany);
|
|
||||||
}
|
|
||||||
readany = true;
|
|
||||||
self.end += readlen;
|
|
||||||
if self.len() >= self.min {
|
|
||||||
return Ok(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Roll the contents of the buffer so that the suffix of this buffer is
|
|
||||||
/// moved to the front and all other contents are dropped. The size of the
|
|
||||||
/// suffix corresponds precisely to the minimum buffer length.
|
|
||||||
///
|
|
||||||
/// This should only be called when the entire contents of this buffer have
|
|
||||||
/// been searched.
|
|
||||||
pub fn roll(&mut self) {
|
|
||||||
let roll_start = self
|
|
||||||
.end
|
|
||||||
.checked_sub(self.min)
|
|
||||||
.expect("buffer capacity should be bigger than minimum amount");
|
|
||||||
let roll_len = self.min;
|
|
||||||
|
|
||||||
assert!(roll_start + roll_len <= self.end);
|
|
||||||
unsafe {
|
|
||||||
// SAFETY: A buffer contains Copy data, so there's no problem
|
|
||||||
// moving it around. Safety also depends on our indices being in
|
|
||||||
// bounds, which they always should be, given the assert above.
|
|
||||||
//
|
|
||||||
// TODO: Switch to [T]::copy_within once our MSRV is high enough.
|
|
||||||
ptr::copy(
|
|
||||||
self.buf[roll_start..].as_ptr(),
|
|
||||||
self.buf.as_mut_ptr(),
|
|
||||||
roll_len,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
self.end = roll_len;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,709 +0,0 @@
|
||||||
use std::mem::size_of;
|
|
||||||
|
|
||||||
use ahocorasick::MatchKind;
|
|
||||||
use automaton::Automaton;
|
|
||||||
use classes::ByteClasses;
|
|
||||||
use error::Result;
|
|
||||||
use nfa::{PatternID, PatternLength, NFA};
|
|
||||||
use prefilter::{Prefilter, PrefilterObj, PrefilterState};
|
|
||||||
use state_id::{dead_id, fail_id, premultiply_overflow_error, StateID};
|
|
||||||
use Match;
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub enum DFA<S> {
|
|
||||||
Standard(Standard<S>),
|
|
||||||
ByteClass(ByteClass<S>),
|
|
||||||
Premultiplied(Premultiplied<S>),
|
|
||||||
PremultipliedByteClass(PremultipliedByteClass<S>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<S: StateID> DFA<S> {
|
|
||||||
fn repr(&self) -> &Repr<S> {
|
|
||||||
match *self {
|
|
||||||
DFA::Standard(ref dfa) => dfa.repr(),
|
|
||||||
DFA::ByteClass(ref dfa) => dfa.repr(),
|
|
||||||
DFA::Premultiplied(ref dfa) => dfa.repr(),
|
|
||||||
DFA::PremultipliedByteClass(ref dfa) => dfa.repr(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn match_kind(&self) -> &MatchKind {
|
|
||||||
&self.repr().match_kind
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn heap_bytes(&self) -> usize {
|
|
||||||
self.repr().heap_bytes
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn max_pattern_len(&self) -> usize {
|
|
||||||
self.repr().max_pattern_len
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn pattern_count(&self) -> usize {
|
|
||||||
self.repr().pattern_count
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn start_state(&self) -> S {
|
|
||||||
self.repr().start_id
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
pub fn overlapping_find_at(
|
|
||||||
&self,
|
|
||||||
prestate: &mut PrefilterState,
|
|
||||||
haystack: &[u8],
|
|
||||||
at: usize,
|
|
||||||
state_id: &mut S,
|
|
||||||
match_index: &mut usize,
|
|
||||||
) -> Option<Match> {
|
|
||||||
match *self {
|
|
||||||
DFA::Standard(ref dfa) => dfa.overlapping_find_at(
|
|
||||||
prestate,
|
|
||||||
haystack,
|
|
||||||
at,
|
|
||||||
state_id,
|
|
||||||
match_index,
|
|
||||||
),
|
|
||||||
DFA::ByteClass(ref dfa) => dfa.overlapping_find_at(
|
|
||||||
prestate,
|
|
||||||
haystack,
|
|
||||||
at,
|
|
||||||
state_id,
|
|
||||||
match_index,
|
|
||||||
),
|
|
||||||
DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at(
|
|
||||||
prestate,
|
|
||||||
haystack,
|
|
||||||
at,
|
|
||||||
state_id,
|
|
||||||
match_index,
|
|
||||||
),
|
|
||||||
DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at(
|
|
||||||
prestate,
|
|
||||||
haystack,
|
|
||||||
at,
|
|
||||||
state_id,
|
|
||||||
match_index,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
pub fn earliest_find_at(
|
|
||||||
&self,
|
|
||||||
prestate: &mut PrefilterState,
|
|
||||||
haystack: &[u8],
|
|
||||||
at: usize,
|
|
||||||
state_id: &mut S,
|
|
||||||
) -> Option<Match> {
|
|
||||||
match *self {
|
|
||||||
DFA::Standard(ref dfa) => {
|
|
||||||
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
|
||||||
}
|
|
||||||
DFA::ByteClass(ref dfa) => {
|
|
||||||
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
|
||||||
}
|
|
||||||
DFA::Premultiplied(ref dfa) => {
|
|
||||||
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
|
||||||
}
|
|
||||||
DFA::PremultipliedByteClass(ref dfa) => {
|
|
||||||
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
pub fn find_at_no_state(
|
|
||||||
&self,
|
|
||||||
prestate: &mut PrefilterState,
|
|
||||||
haystack: &[u8],
|
|
||||||
at: usize,
|
|
||||||
) -> Option<Match> {
|
|
||||||
match *self {
|
|
||||||
DFA::Standard(ref dfa) => {
|
|
||||||
dfa.find_at_no_state(prestate, haystack, at)
|
|
||||||
}
|
|
||||||
DFA::ByteClass(ref dfa) => {
|
|
||||||
dfa.find_at_no_state(prestate, haystack, at)
|
|
||||||
}
|
|
||||||
DFA::Premultiplied(ref dfa) => {
|
|
||||||
dfa.find_at_no_state(prestate, haystack, at)
|
|
||||||
}
|
|
||||||
DFA::PremultipliedByteClass(ref dfa) => {
|
|
||||||
dfa.find_at_no_state(prestate, haystack, at)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct Standard<S>(Repr<S>);
|
|
||||||
|
|
||||||
impl<S: StateID> Standard<S> {
|
|
||||||
fn repr(&self) -> &Repr<S> {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<S: StateID> Automaton for Standard<S> {
|
|
||||||
type ID = S;
|
|
||||||
|
|
||||||
fn match_kind(&self) -> &MatchKind {
|
|
||||||
&self.repr().match_kind
|
|
||||||
}
|
|
||||||
|
|
||||||
fn anchored(&self) -> bool {
|
|
||||||
self.repr().anchored
|
|
||||||
}
|
|
||||||
|
|
||||||
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
|
||||||
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn start_state(&self) -> S {
|
|
||||||
self.repr().start_id
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_valid(&self, id: S) -> bool {
|
|
||||||
id.to_usize() < self.repr().state_count
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_match_state(&self, id: S) -> bool {
|
|
||||||
self.repr().is_match_state(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_match_or_dead_state(&self, id: S) -> bool {
|
|
||||||
self.repr().is_match_or_dead_state(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_match(
|
|
||||||
&self,
|
|
||||||
id: S,
|
|
||||||
match_index: usize,
|
|
||||||
end: usize,
|
|
||||||
) -> Option<Match> {
|
|
||||||
self.repr().get_match(id, match_index, end)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn match_count(&self, id: S) -> usize {
|
|
||||||
self.repr().match_count(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn next_state(&self, current: S, input: u8) -> S {
|
|
||||||
let o = current.to_usize() * 256 + input as usize;
|
|
||||||
self.repr().trans[o]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct ByteClass<S>(Repr<S>);
|
|
||||||
|
|
||||||
impl<S: StateID> ByteClass<S> {
|
|
||||||
fn repr(&self) -> &Repr<S> {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<S: StateID> Automaton for ByteClass<S> {
|
|
||||||
type ID = S;
|
|
||||||
|
|
||||||
fn match_kind(&self) -> &MatchKind {
|
|
||||||
&self.repr().match_kind
|
|
||||||
}
|
|
||||||
|
|
||||||
fn anchored(&self) -> bool {
|
|
||||||
self.repr().anchored
|
|
||||||
}
|
|
||||||
|
|
||||||
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
|
||||||
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn start_state(&self) -> S {
|
|
||||||
self.repr().start_id
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_valid(&self, id: S) -> bool {
|
|
||||||
id.to_usize() < self.repr().state_count
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_match_state(&self, id: S) -> bool {
|
|
||||||
self.repr().is_match_state(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_match_or_dead_state(&self, id: S) -> bool {
|
|
||||||
self.repr().is_match_or_dead_state(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_match(
|
|
||||||
&self,
|
|
||||||
id: S,
|
|
||||||
match_index: usize,
|
|
||||||
end: usize,
|
|
||||||
) -> Option<Match> {
|
|
||||||
self.repr().get_match(id, match_index, end)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn match_count(&self, id: S) -> usize {
|
|
||||||
self.repr().match_count(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn next_state(&self, current: S, input: u8) -> S {
|
|
||||||
let alphabet_len = self.repr().byte_classes.alphabet_len();
|
|
||||||
let input = self.repr().byte_classes.get(input);
|
|
||||||
let o = current.to_usize() * alphabet_len + input as usize;
|
|
||||||
self.repr().trans[o]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct Premultiplied<S>(Repr<S>);
|
|
||||||
|
|
||||||
impl<S: StateID> Premultiplied<S> {
|
|
||||||
fn repr(&self) -> &Repr<S> {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<S: StateID> Automaton for Premultiplied<S> {
|
|
||||||
type ID = S;
|
|
||||||
|
|
||||||
fn match_kind(&self) -> &MatchKind {
|
|
||||||
&self.repr().match_kind
|
|
||||||
}
|
|
||||||
|
|
||||||
fn anchored(&self) -> bool {
|
|
||||||
self.repr().anchored
|
|
||||||
}
|
|
||||||
|
|
||||||
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
|
||||||
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn start_state(&self) -> S {
|
|
||||||
self.repr().start_id
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_valid(&self, id: S) -> bool {
|
|
||||||
(id.to_usize() / 256) < self.repr().state_count
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_match_state(&self, id: S) -> bool {
|
|
||||||
self.repr().is_match_state(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_match_or_dead_state(&self, id: S) -> bool {
|
|
||||||
self.repr().is_match_or_dead_state(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_match(
|
|
||||||
&self,
|
|
||||||
id: S,
|
|
||||||
match_index: usize,
|
|
||||||
end: usize,
|
|
||||||
) -> Option<Match> {
|
|
||||||
if id > self.repr().max_match {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
self.repr()
|
|
||||||
.matches
|
|
||||||
.get(id.to_usize() / 256)
|
|
||||||
.and_then(|m| m.get(match_index))
|
|
||||||
.map(|&(id, len)| Match { pattern: id, len, end })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn match_count(&self, id: S) -> usize {
|
|
||||||
let o = id.to_usize() / 256;
|
|
||||||
self.repr().matches[o].len()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn next_state(&self, current: S, input: u8) -> S {
|
|
||||||
let o = current.to_usize() + input as usize;
|
|
||||||
self.repr().trans[o]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct PremultipliedByteClass<S>(Repr<S>);
|
|
||||||
|
|
||||||
impl<S: StateID> PremultipliedByteClass<S> {
|
|
||||||
fn repr(&self) -> &Repr<S> {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<S: StateID> Automaton for PremultipliedByteClass<S> {
|
|
||||||
type ID = S;
|
|
||||||
|
|
||||||
fn match_kind(&self) -> &MatchKind {
|
|
||||||
&self.repr().match_kind
|
|
||||||
}
|
|
||||||
|
|
||||||
fn anchored(&self) -> bool {
|
|
||||||
self.repr().anchored
|
|
||||||
}
|
|
||||||
|
|
||||||
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
|
||||||
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn start_state(&self) -> S {
|
|
||||||
self.repr().start_id
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_valid(&self, id: S) -> bool {
|
|
||||||
(id.to_usize() / self.repr().alphabet_len()) < self.repr().state_count
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_match_state(&self, id: S) -> bool {
|
|
||||||
self.repr().is_match_state(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_match_or_dead_state(&self, id: S) -> bool {
|
|
||||||
self.repr().is_match_or_dead_state(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_match(
|
|
||||||
&self,
|
|
||||||
id: S,
|
|
||||||
match_index: usize,
|
|
||||||
end: usize,
|
|
||||||
) -> Option<Match> {
|
|
||||||
if id > self.repr().max_match {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
self.repr()
|
|
||||||
.matches
|
|
||||||
.get(id.to_usize() / self.repr().alphabet_len())
|
|
||||||
.and_then(|m| m.get(match_index))
|
|
||||||
.map(|&(id, len)| Match { pattern: id, len, end })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn match_count(&self, id: S) -> usize {
|
|
||||||
let o = id.to_usize() / self.repr().alphabet_len();
|
|
||||||
self.repr().matches[o].len()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn next_state(&self, current: S, input: u8) -> S {
|
|
||||||
let input = self.repr().byte_classes.get(input);
|
|
||||||
let o = current.to_usize() + input as usize;
|
|
||||||
self.repr().trans[o]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct Repr<S> {
|
|
||||||
match_kind: MatchKind,
|
|
||||||
anchored: bool,
|
|
||||||
premultiplied: bool,
|
|
||||||
start_id: S,
|
|
||||||
/// The length, in bytes, of the longest pattern in this automaton. This
|
|
||||||
/// information is useful for keeping correct buffer sizes when searching
|
|
||||||
/// on streams.
|
|
||||||
max_pattern_len: usize,
|
|
||||||
/// The total number of patterns added to this automaton. This includes
|
|
||||||
/// patterns that may never match.
|
|
||||||
pattern_count: usize,
|
|
||||||
state_count: usize,
|
|
||||||
max_match: S,
|
|
||||||
/// The number of bytes of heap used by this NFA's transition table.
|
|
||||||
heap_bytes: usize,
|
|
||||||
/// A prefilter for quickly detecting candidate matchs, if pertinent.
|
|
||||||
prefilter: Option<PrefilterObj>,
|
|
||||||
byte_classes: ByteClasses,
|
|
||||||
trans: Vec<S>,
|
|
||||||
matches: Vec<Vec<(PatternID, PatternLength)>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<S: StateID> Repr<S> {
|
|
||||||
/// Returns the total alphabet size for this DFA.
|
|
||||||
///
|
|
||||||
/// If byte classes are enabled, then this corresponds to the number of
|
|
||||||
/// equivalence classes. If they are disabled, then this is always 256.
|
|
||||||
fn alphabet_len(&self) -> usize {
|
|
||||||
self.byte_classes.alphabet_len()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true only if the given state is a match state.
|
|
||||||
fn is_match_state(&self, id: S) -> bool {
|
|
||||||
id <= self.max_match && id > dead_id()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true only if the given state is either a dead state or a match
|
|
||||||
/// state.
|
|
||||||
fn is_match_or_dead_state(&self, id: S) -> bool {
|
|
||||||
id <= self.max_match
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the ith match for the given state, where the end position of a
|
|
||||||
/// match was found at `end`.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// The caller must ensure that the given state identifier is valid,
|
|
||||||
/// otherwise this may panic. The `match_index` need not be valid. That is,
|
|
||||||
/// if the given state has no matches then this returns `None`.
|
|
||||||
fn get_match(
|
|
||||||
&self,
|
|
||||||
id: S,
|
|
||||||
match_index: usize,
|
|
||||||
end: usize,
|
|
||||||
) -> Option<Match> {
|
|
||||||
if id > self.max_match {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
self.matches
|
|
||||||
.get(id.to_usize())
|
|
||||||
.and_then(|m| m.get(match_index))
|
|
||||||
.map(|&(id, len)| Match { pattern: id, len, end })
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the total number of matches for the given state.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// The caller must ensure that the given identifier is valid, or else
|
|
||||||
/// this panics.
|
|
||||||
fn match_count(&self, id: S) -> usize {
|
|
||||||
self.matches[id.to_usize()].len()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the next state given `from` as the current state and `byte` as the
|
|
||||||
/// current input byte.
|
|
||||||
fn next_state(&self, from: S, byte: u8) -> S {
|
|
||||||
let alphabet_len = self.alphabet_len();
|
|
||||||
let byte = self.byte_classes.get(byte);
|
|
||||||
self.trans[from.to_usize() * alphabet_len + byte as usize]
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the `byte` transition for the `from` state to point to `to`.
|
|
||||||
fn set_next_state(&mut self, from: S, byte: u8, to: S) {
|
|
||||||
let alphabet_len = self.alphabet_len();
|
|
||||||
let byte = self.byte_classes.get(byte);
|
|
||||||
self.trans[from.to_usize() * alphabet_len + byte as usize] = to;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Swap the given states in place.
|
|
||||||
fn swap_states(&mut self, id1: S, id2: S) {
|
|
||||||
assert!(!self.premultiplied, "can't swap states in premultiplied DFA");
|
|
||||||
|
|
||||||
let o1 = id1.to_usize() * self.alphabet_len();
|
|
||||||
let o2 = id2.to_usize() * self.alphabet_len();
|
|
||||||
for b in 0..self.alphabet_len() {
|
|
||||||
self.trans.swap(o1 + b, o2 + b);
|
|
||||||
}
|
|
||||||
self.matches.swap(id1.to_usize(), id2.to_usize());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This routine shuffles all match states in this DFA to the beginning
|
|
||||||
/// of the DFA such that every non-match state appears after every match
|
|
||||||
/// state. (With one exception: the special fail and dead states remain as
|
|
||||||
/// the first two states.)
|
|
||||||
///
|
|
||||||
/// The purpose of doing this shuffling is to avoid an extra conditional
|
|
||||||
/// in the search loop, and in particular, detecting whether a state is a
|
|
||||||
/// match or not does not need to access any memory.
|
|
||||||
///
|
|
||||||
/// This updates `self.max_match` to point to the last matching state as
|
|
||||||
/// well as `self.start` if the starting state was moved.
|
|
||||||
fn shuffle_match_states(&mut self) {
|
|
||||||
assert!(
|
|
||||||
!self.premultiplied,
|
|
||||||
"cannot shuffle match states of premultiplied DFA"
|
|
||||||
);
|
|
||||||
|
|
||||||
if self.state_count <= 1 {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut first_non_match = self.start_id.to_usize();
|
|
||||||
while first_non_match < self.state_count
|
|
||||||
&& self.matches[first_non_match].len() > 0
|
|
||||||
{
|
|
||||||
first_non_match += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut swaps: Vec<S> = vec![fail_id(); self.state_count];
|
|
||||||
let mut cur = self.state_count - 1;
|
|
||||||
while cur > first_non_match {
|
|
||||||
if self.matches[cur].len() > 0 {
|
|
||||||
self.swap_states(
|
|
||||||
S::from_usize(cur),
|
|
||||||
S::from_usize(first_non_match),
|
|
||||||
);
|
|
||||||
swaps[cur] = S::from_usize(first_non_match);
|
|
||||||
swaps[first_non_match] = S::from_usize(cur);
|
|
||||||
|
|
||||||
first_non_match += 1;
|
|
||||||
while first_non_match < cur
|
|
||||||
&& self.matches[first_non_match].len() > 0
|
|
||||||
{
|
|
||||||
first_non_match += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cur -= 1;
|
|
||||||
}
|
|
||||||
for id in (0..self.state_count).map(S::from_usize) {
|
|
||||||
let alphabet_len = self.alphabet_len();
|
|
||||||
let offset = id.to_usize() * alphabet_len;
|
|
||||||
for next in &mut self.trans[offset..offset + alphabet_len] {
|
|
||||||
if swaps[next.to_usize()] != fail_id() {
|
|
||||||
*next = swaps[next.to_usize()];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if swaps[self.start_id.to_usize()] != fail_id() {
|
|
||||||
self.start_id = swaps[self.start_id.to_usize()];
|
|
||||||
}
|
|
||||||
self.max_match = S::from_usize(first_non_match - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn premultiply(&mut self) -> Result<()> {
|
|
||||||
if self.premultiplied || self.state_count <= 1 {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let alpha_len = self.alphabet_len();
|
|
||||||
premultiply_overflow_error(
|
|
||||||
S::from_usize(self.state_count - 1),
|
|
||||||
alpha_len,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
for id in (2..self.state_count).map(S::from_usize) {
|
|
||||||
let offset = id.to_usize() * alpha_len;
|
|
||||||
for next in &mut self.trans[offset..offset + alpha_len] {
|
|
||||||
if *next == dead_id() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
*next = S::from_usize(next.to_usize() * alpha_len);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.premultiplied = true;
|
|
||||||
self.start_id = S::from_usize(self.start_id.to_usize() * alpha_len);
|
|
||||||
self.max_match = S::from_usize(self.max_match.to_usize() * alpha_len);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Computes the total amount of heap used by this NFA in bytes.
|
|
||||||
fn calculate_size(&mut self) {
|
|
||||||
let mut size = (self.trans.len() * size_of::<S>())
|
|
||||||
+ (self.matches.len()
|
|
||||||
* size_of::<Vec<(PatternID, PatternLength)>>());
|
|
||||||
for state_matches in &self.matches {
|
|
||||||
size +=
|
|
||||||
state_matches.len() * size_of::<(PatternID, PatternLength)>();
|
|
||||||
}
|
|
||||||
size += self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes());
|
|
||||||
self.heap_bytes = size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A builder for configuring the determinization of an NFA into a DFA.
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct Builder {
|
|
||||||
premultiply: bool,
|
|
||||||
byte_classes: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Builder {
|
|
||||||
/// Create a new builder for a DFA.
|
|
||||||
pub fn new() -> Builder {
|
|
||||||
Builder { premultiply: true, byte_classes: true }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build a DFA from the given NFA.
|
|
||||||
///
|
|
||||||
/// This returns an error if the state identifiers exceed their
|
|
||||||
/// representation size. This can only happen when state ids are
|
|
||||||
/// premultiplied (which is enabled by default).
|
|
||||||
pub fn build<S: StateID>(&self, nfa: &NFA<S>) -> Result<DFA<S>> {
|
|
||||||
let byte_classes = if self.byte_classes {
|
|
||||||
nfa.byte_classes().clone()
|
|
||||||
} else {
|
|
||||||
ByteClasses::singletons()
|
|
||||||
};
|
|
||||||
let alphabet_len = byte_classes.alphabet_len();
|
|
||||||
let trans = vec![fail_id(); alphabet_len * nfa.state_len()];
|
|
||||||
let matches = vec![vec![]; nfa.state_len()];
|
|
||||||
let mut repr = Repr {
|
|
||||||
match_kind: nfa.match_kind().clone(),
|
|
||||||
anchored: nfa.anchored(),
|
|
||||||
premultiplied: false,
|
|
||||||
start_id: nfa.start_state(),
|
|
||||||
max_pattern_len: nfa.max_pattern_len(),
|
|
||||||
pattern_count: nfa.pattern_count(),
|
|
||||||
state_count: nfa.state_len(),
|
|
||||||
max_match: fail_id(),
|
|
||||||
heap_bytes: 0,
|
|
||||||
prefilter: nfa.prefilter_obj().map(|p| p.clone()),
|
|
||||||
byte_classes: byte_classes.clone(),
|
|
||||||
trans,
|
|
||||||
matches,
|
|
||||||
};
|
|
||||||
for id in (0..nfa.state_len()).map(S::from_usize) {
|
|
||||||
repr.matches[id.to_usize()].extend_from_slice(nfa.matches(id));
|
|
||||||
|
|
||||||
let fail = nfa.failure_transition(id);
|
|
||||||
nfa.iter_all_transitions(&byte_classes, id, |b, mut next| {
|
|
||||||
if next == fail_id() {
|
|
||||||
next = nfa_next_state_memoized(nfa, &repr, id, fail, b);
|
|
||||||
}
|
|
||||||
repr.set_next_state(id, b, next);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
repr.shuffle_match_states();
|
|
||||||
repr.calculate_size();
|
|
||||||
if self.premultiply {
|
|
||||||
repr.premultiply()?;
|
|
||||||
if byte_classes.is_singleton() {
|
|
||||||
Ok(DFA::Premultiplied(Premultiplied(repr)))
|
|
||||||
} else {
|
|
||||||
Ok(DFA::PremultipliedByteClass(PremultipliedByteClass(repr)))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if byte_classes.is_singleton() {
|
|
||||||
Ok(DFA::Standard(Standard(repr)))
|
|
||||||
} else {
|
|
||||||
Ok(DFA::ByteClass(ByteClass(repr)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether to use byte classes or in the DFA.
|
|
||||||
pub fn byte_classes(&mut self, yes: bool) -> &mut Builder {
|
|
||||||
self.byte_classes = yes;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether to premultiply state identifier in the DFA.
|
|
||||||
pub fn premultiply(&mut self, yes: bool) -> &mut Builder {
|
|
||||||
self.premultiply = yes;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This returns the next NFA transition (including resolving failure
|
|
||||||
/// transitions), except once it sees a state id less than the id of the DFA
|
|
||||||
/// state that is currently being populated, then we no longer need to follow
|
|
||||||
/// failure transitions and can instead query the pre-computed state id from
|
|
||||||
/// the DFA itself.
|
|
||||||
///
|
|
||||||
/// In general, this should only be called when a failure transition is seen.
|
|
||||||
fn nfa_next_state_memoized<S: StateID>(
|
|
||||||
nfa: &NFA<S>,
|
|
||||||
dfa: &Repr<S>,
|
|
||||||
populating: S,
|
|
||||||
mut current: S,
|
|
||||||
input: u8,
|
|
||||||
) -> S {
|
|
||||||
loop {
|
|
||||||
if current < populating {
|
|
||||||
return dfa.next_state(current, input);
|
|
||||||
}
|
|
||||||
let next = nfa.next_state(current, input);
|
|
||||||
if next != fail_id() {
|
|
||||||
return next;
|
|
||||||
}
|
|
||||||
current = nfa.failure_transition(current);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,297 +0,0 @@
|
||||||
/*!
|
|
||||||
A library for finding occurrences of many patterns at once. This library
|
|
||||||
provides multiple pattern search principally through an implementation of the
|
|
||||||
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
|
|
||||||
which builds a fast finite state machine for executing searches in linear time.
|
|
||||||
|
|
||||||
Additionally, this library provides a number of configuration options for
|
|
||||||
building the automaton that permit controlling the space versus time trade
|
|
||||||
off. Other features include simple ASCII case insensitive matching, finding
|
|
||||||
overlapping matches, replacements, searching streams and even searching and
|
|
||||||
replacing text in streams.
|
|
||||||
|
|
||||||
Finally, unlike all other (known) Aho-Corasick implementations, this one
|
|
||||||
supports enabling
|
|
||||||
[leftmost-first](enum.MatchKind.html#variant.LeftmostFirst)
|
|
||||||
or
|
|
||||||
[leftmost-longest](enum.MatchKind.html#variant.LeftmostFirst)
|
|
||||||
match semantics, using a (seemingly) novel alternative construction algorithm.
|
|
||||||
For more details on what match semantics means, see the
|
|
||||||
[`MatchKind`](enum.MatchKind.html)
|
|
||||||
type.
|
|
||||||
|
|
||||||
# Overview
|
|
||||||
|
|
||||||
This section gives a brief overview of the primary types in this crate:
|
|
||||||
|
|
||||||
* [`AhoCorasick`](struct.AhoCorasick.html) is the primary type and represents
|
|
||||||
an Aho-Corasick automaton. This is the type you use to execute searches.
|
|
||||||
* [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) can be used to build
|
|
||||||
an Aho-Corasick automaton, and supports configuring a number of options.
|
|
||||||
* [`Match`](struct.Match.html) represents a single match reported by an
|
|
||||||
Aho-Corasick automaton. Each match has two pieces of information: the pattern
|
|
||||||
that matched and the start and end byte offsets corresponding to the position
|
|
||||||
in the haystack at which it matched.
|
|
||||||
|
|
||||||
Additionally, the [`packed`](packed/index.html) sub-module contains a lower
|
|
||||||
level API for using fast vectorized routines for finding a small number of
|
|
||||||
patterns in a haystack.
|
|
||||||
|
|
||||||
# Example: basic searching
|
|
||||||
|
|
||||||
This example shows how to search for occurrences of multiple patterns
|
|
||||||
simultaneously. Each match includes the pattern that matched along with the
|
|
||||||
byte offsets of the match.
|
|
||||||
|
|
||||||
```
|
|
||||||
use aho_corasick::AhoCorasick;
|
|
||||||
|
|
||||||
let patterns = &["apple", "maple", "Snapple"];
|
|
||||||
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
|
||||||
|
|
||||||
let ac = AhoCorasick::new(patterns);
|
|
||||||
let mut matches = vec![];
|
|
||||||
for mat in ac.find_iter(haystack) {
|
|
||||||
matches.push((mat.pattern(), mat.start(), mat.end()));
|
|
||||||
}
|
|
||||||
assert_eq!(matches, vec![
|
|
||||||
(1, 13, 18),
|
|
||||||
(0, 28, 33),
|
|
||||||
(2, 43, 50),
|
|
||||||
]);
|
|
||||||
```
|
|
||||||
|
|
||||||
# Example: case insensitivity
|
|
||||||
|
|
||||||
This is like the previous example, but matches `Snapple` case insensitively
|
|
||||||
using `AhoCorasickBuilder`:
|
|
||||||
|
|
||||||
```
|
|
||||||
use aho_corasick::AhoCorasickBuilder;
|
|
||||||
|
|
||||||
let patterns = &["apple", "maple", "snapple"];
|
|
||||||
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
|
||||||
|
|
||||||
let ac = AhoCorasickBuilder::new()
|
|
||||||
.ascii_case_insensitive(true)
|
|
||||||
.build(patterns);
|
|
||||||
let mut matches = vec![];
|
|
||||||
for mat in ac.find_iter(haystack) {
|
|
||||||
matches.push((mat.pattern(), mat.start(), mat.end()));
|
|
||||||
}
|
|
||||||
assert_eq!(matches, vec![
|
|
||||||
(1, 13, 18),
|
|
||||||
(0, 28, 33),
|
|
||||||
(2, 43, 50),
|
|
||||||
]);
|
|
||||||
```
|
|
||||||
|
|
||||||
# Example: replacing matches in a stream
|
|
||||||
|
|
||||||
This example shows how to execute a search and replace on a stream without
|
|
||||||
loading the entire stream into memory first.
|
|
||||||
|
|
||||||
```
|
|
||||||
use aho_corasick::AhoCorasick;
|
|
||||||
|
|
||||||
# fn example() -> Result<(), ::std::io::Error> {
|
|
||||||
let patterns = &["fox", "brown", "quick"];
|
|
||||||
let replace_with = &["sloth", "grey", "slow"];
|
|
||||||
|
|
||||||
// In a real example, these might be `std::fs::File`s instead. All you need to
|
|
||||||
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
|
|
||||||
let rdr = "The quick brown fox.";
|
|
||||||
let mut wtr = vec![];
|
|
||||||
|
|
||||||
let ac = AhoCorasick::new(patterns);
|
|
||||||
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
|
|
||||||
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
|
|
||||||
# Ok(()) }; example().unwrap()
|
|
||||||
```
|
|
||||||
|
|
||||||
# Example: finding the leftmost first match
|
|
||||||
|
|
||||||
In the textbook description of Aho-Corasick, its formulation is typically
|
|
||||||
structured such that it reports all possible matches, even when they overlap
|
|
||||||
with another. In many cases, overlapping matches may not be desired, such as
|
|
||||||
the case of finding all successive non-overlapping matches like you might with
|
|
||||||
a standard regular expression.
|
|
||||||
|
|
||||||
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
|
|
||||||
this doesn't always work in the expected way, since it will report matches as
|
|
||||||
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
|
|
||||||
against the text `Samwise`. Most regex engines (that are Perl-like, or
|
|
||||||
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
|
|
||||||
algorithm modified for reporting non-overlapping matches will report `Sam`.
|
|
||||||
|
|
||||||
A novel contribution of this library is the ability to change the match
|
|
||||||
semantics of Aho-Corasick (without additional search time overhead) such that
|
|
||||||
`Samwise` is reported instead. For example, here's the standard approach:
|
|
||||||
|
|
||||||
```
|
|
||||||
use aho_corasick::AhoCorasick;
|
|
||||||
|
|
||||||
let patterns = &["Samwise", "Sam"];
|
|
||||||
let haystack = "Samwise";
|
|
||||||
|
|
||||||
let ac = AhoCorasick::new(patterns);
|
|
||||||
let mat = ac.find(haystack).expect("should have a match");
|
|
||||||
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
|
|
||||||
```
|
|
||||||
|
|
||||||
And now here's the leftmost-first version, which matches how a Perl-like
|
|
||||||
regex will work:
|
|
||||||
|
|
||||||
```
|
|
||||||
use aho_corasick::{AhoCorasickBuilder, MatchKind};
|
|
||||||
|
|
||||||
let patterns = &["Samwise", "Sam"];
|
|
||||||
let haystack = "Samwise";
|
|
||||||
|
|
||||||
let ac = AhoCorasickBuilder::new()
|
|
||||||
.match_kind(MatchKind::LeftmostFirst)
|
|
||||||
.build(patterns);
|
|
||||||
let mat = ac.find(haystack).expect("should have a match");
|
|
||||||
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
|
|
||||||
```
|
|
||||||
|
|
||||||
In addition to leftmost-first semantics, this library also supports
|
|
||||||
leftmost-longest semantics, which match the POSIX behavior of a regular
|
|
||||||
expression alternation. See
|
|
||||||
[`MatchKind`](enum.MatchKind.html)
|
|
||||||
for more details.
|
|
||||||
|
|
||||||
# Prefilters
|
|
||||||
|
|
||||||
While an Aho-Corasick automaton can perform admirably when compared to more
|
|
||||||
naive solutions, it is generally slower than more specialized algorithms that
|
|
||||||
are accelerated using vector instructions such as SIMD.
|
|
||||||
|
|
||||||
For that reason, this library will internally use a "prefilter" to attempt
|
|
||||||
to accelerate searches when possible. Currently, this library has fairly
|
|
||||||
limited implementation that only applies when there are 3 or fewer unique
|
|
||||||
starting bytes among all patterns in an automaton.
|
|
||||||
|
|
||||||
While a prefilter is generally good to have on by default since it works well
|
|
||||||
in the common case, it can lead to less predictable or even sub-optimal
|
|
||||||
performance in some cases. For that reason, prefilters can be disabled via
|
|
||||||
[`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter).
|
|
||||||
*/
|
|
||||||
|
|
||||||
#![deny(missing_docs)]
|
|
||||||
|
|
||||||
// We can never be truly no_std, but we could be alloc-only some day, so
|
|
||||||
// require the std feature for now.
|
|
||||||
#[cfg(not(feature = "std"))]
|
|
||||||
compile_error!("`std` feature is currently required to build this crate");
|
|
||||||
|
|
||||||
extern crate memchr;
|
|
||||||
#[cfg(test)]
|
|
||||||
#[macro_use]
|
|
||||||
extern crate doc_comment;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
doctest!("../README.md");
|
|
||||||
|
|
||||||
pub use ahocorasick::{
|
|
||||||
AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind,
|
|
||||||
StreamFindIter,
|
|
||||||
};
|
|
||||||
pub use error::{Error, ErrorKind};
|
|
||||||
pub use state_id::StateID;
|
|
||||||
|
|
||||||
mod ahocorasick;
|
|
||||||
mod automaton;
|
|
||||||
mod buffer;
|
|
||||||
mod byte_frequencies;
|
|
||||||
mod classes;
|
|
||||||
mod dfa;
|
|
||||||
mod error;
|
|
||||||
mod nfa;
|
|
||||||
pub mod packed;
|
|
||||||
mod prefilter;
|
|
||||||
mod state_id;
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests;
|
|
||||||
|
|
||||||
/// A representation of a match reported by an Aho-Corasick automaton.
|
|
||||||
///
|
|
||||||
/// A match has two essential pieces of information: the identifier of the
|
|
||||||
/// pattern that matched, along with the start and end offsets of the match
|
|
||||||
/// in the haystack.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// Basic usage:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use aho_corasick::AhoCorasick;
|
|
||||||
///
|
|
||||||
/// let ac = AhoCorasick::new(&[
|
|
||||||
/// "foo", "bar", "baz",
|
|
||||||
/// ]);
|
|
||||||
/// let mat = ac.find("xxx bar xxx").expect("should have a match");
|
|
||||||
/// assert_eq!(1, mat.pattern());
|
|
||||||
/// assert_eq!(4, mat.start());
|
|
||||||
/// assert_eq!(7, mat.end());
|
|
||||||
/// ```
|
|
||||||
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
|
|
||||||
pub struct Match {
|
|
||||||
/// The pattern id.
|
|
||||||
pattern: usize,
|
|
||||||
/// The length of this match, such that the starting position of the match
|
|
||||||
/// is `end - len`.
|
|
||||||
///
|
|
||||||
/// We use length here because, other than the pattern id, the only
|
|
||||||
/// information about each pattern that the automaton stores is its length.
|
|
||||||
/// So using the length here is just a bit more natural. But it isn't
|
|
||||||
/// technically required.
|
|
||||||
len: usize,
|
|
||||||
/// The end offset of the match, exclusive.
|
|
||||||
end: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Match {
|
|
||||||
/// Returns the identifier of the pattern that matched.
|
|
||||||
///
|
|
||||||
/// The identifier of a pattern is derived from the position in which it
|
|
||||||
/// was originally inserted into the corresponding automaton. The first
|
|
||||||
/// pattern has identifier `0`, and each subsequent pattern is `1`, `2`
|
|
||||||
/// and so on.
|
|
||||||
#[inline]
|
|
||||||
pub fn pattern(&self) -> usize {
|
|
||||||
self.pattern
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The starting position of the match.
|
|
||||||
#[inline]
|
|
||||||
pub fn start(&self) -> usize {
|
|
||||||
self.end - self.len
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The ending position of the match.
|
|
||||||
#[inline]
|
|
||||||
pub fn end(&self) -> usize {
|
|
||||||
self.end
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if and only if this match is empty. That is, when
|
|
||||||
/// `start() == end()`.
|
|
||||||
///
|
|
||||||
/// An empty match can only be returned when the empty string was among
|
|
||||||
/// the patterns used to build the Aho-Corasick automaton.
|
|
||||||
#[inline]
|
|
||||||
pub fn is_empty(&self) -> bool {
|
|
||||||
self.len == 0
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn increment(&self, by: usize) -> Match {
|
|
||||||
Match { pattern: self.pattern, len: self.len, end: self.end + by }
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn from_span(id: usize, start: usize, end: usize) -> Match {
|
|
||||||
Match { pattern: id, len: end - start, end }
|
|
||||||
}
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
||||||
|
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"cf15db60c9989b860ea874785f1d8b1b5350a28501cd169831042837ee6f350c","DESIGN.md":"9065f33d818d1562244d36dc4781e2a351108030cee17f11c2ba512ca7b4c27e","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"88a0a32dce081f2b1a5c71f2208c155961b0171daf1a5a9a70371fc5cf0c4304","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"00db7f3c3e9c764f7e2e620ee28baeebaf2101191218dd06b75d7656749a096f","src/automaton.rs":"ea3fc2648e026eac9f9969b0d457e49af7b4a40044379ce010d054f22afbc98f","src/buffer.rs":"dae7ee7c1f846ca9cf115ba4949484000e1837b4fb7311f8d8c9a35011c9c26f","src/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/classes.rs":"166c9f15c9a2e370e2bc9a9e1620bb2db13df52edfde9a0db1f20144519a7e72","src/dfa.rs":"8302d809d0acbf2a9d5f947d91e28d779cc547e49256961029f1c01e3375cba9","src/error.rs":"36dbf2cefbfaa8a69186551320dbff023d3e82780a6c925e87c3e3997b967e66","src/lib.rs":"2d2ef0070fb6c4ba6695e948e60e05ed63d3e8c31de28d3c112653798f95d6d3","src/nfa.rs":"3f68abf96a1c556021f0b7011634b3273b3df7e5366c9d27a2fc63e93ffbd95d","src/packed/api.rs":"6c65dfa177b7d7b79f90a048f260bec7f817126c693b85f49704c7d2ecf5f646","src/packed/mod.rs":"29c76ad3cbb1f831140cefac7a27fb504ac4af4f454975a571965b48aad417eb","src/packed/pattern.rs":"b88c57af057997da0a5a06f4c5604a7e598c20acfc11c15cd8977727f6e1cf9c","src/packed/rabinkarp.rs":"b3242a8631ea5607163dcbb641e4ac9c6da26774378da1e51651b0ab5656b390","src/packed/teddy/README.md":"5819f40d221af93288e705eadef5393a41d7a0900881b4d676e01fd65d5adf15","src/packed/teddy/compile.rs":"5d7de6a45a84bb2322647a6de7a7b1573837b9222b16e348f023b8d47e0a5130","src/packed/teddy/mod.rs":"f63db3419b1d378929bf0bc1f0e3b909ff3c38b9f2b6e86ba4546b8f39907cd3","src/packed/teddy/runtime.rs":"0a1250ea73159b3be6e0fa9a3f55ecedbb2cb90cb798d1709e9f5ee48f8855d5","src/packed/tests.rs":"0b52ab9eef73a1a4f141f475a9fa98e54d447104aa69acba3a7f8248ce7164b2","src/packed/vector.rs":"ab3c0535fca5f09198d58cbfae44c292aeb3ce44bc92bca36d30dc72963639fc","src/prefilter.rs":"5191cc8a273a909ca460c492357401cb664a66ed0505948e969786d655a9aed8","src/state_id.rs":"50958ca2b089d775fb4e49a64950e2f1e8a4af1772fe782ae3715a7745dcc6d7","src/tests.rs":"837bf7e7b9aa8b215a750475411730fa081370ee93e78a6f516d86280a1ab906"},"package":"7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"}
|
|
@ -0,0 +1,56 @@
|
||||||
|
"""
|
||||||
|
@generated
|
||||||
|
cargo-raze crate build file.
|
||||||
|
|
||||||
|
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||||
|
"""
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load(
|
||||||
|
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||||
|
"rust_binary",
|
||||||
|
"rust_library",
|
||||||
|
"rust_test",
|
||||||
|
)
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||||
|
|
||||||
|
package(default_visibility = [
|
||||||
|
# Public for visibility by "@raze__crate__version//" targets.
|
||||||
|
#
|
||||||
|
# Prefer access through "//third_party/cargo", which limits external
|
||||||
|
# visibility to explicit Cargo.toml dependencies.
|
||||||
|
"//visibility:public",
|
||||||
|
])
|
||||||
|
|
||||||
|
licenses([
|
||||||
|
"unencumbered", # Unlicense from expression "Unlicense OR MIT"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Generated Targets
|
||||||
|
|
||||||
|
rust_library(
|
||||||
|
name = "aho_corasick",
|
||||||
|
srcs = glob(["**/*.rs"]),
|
||||||
|
crate_features = [
|
||||||
|
"default",
|
||||||
|
"std",
|
||||||
|
],
|
||||||
|
crate_root = "src/lib.rs",
|
||||||
|
crate_type = "lib",
|
||||||
|
data = [],
|
||||||
|
edition = "2015",
|
||||||
|
rustc_flags = [
|
||||||
|
"--cap-lints=allow",
|
||||||
|
],
|
||||||
|
tags = [
|
||||||
|
"cargo-raze",
|
||||||
|
"manual",
|
||||||
|
],
|
||||||
|
version = "0.7.15",
|
||||||
|
# buildifier: leave-alone
|
||||||
|
deps = [
|
||||||
|
"//third_party/cargo/vendor/memchr-2.3.4:memchr",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,42 @@
|
||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||||
|
#
|
||||||
|
# When uploading crates to the registry Cargo will automatically
|
||||||
|
# "normalize" Cargo.toml files for maximal compatibility
|
||||||
|
# with all versions of Cargo and also rewrite `path` dependencies
|
||||||
|
# to registry (e.g., crates.io) dependencies
|
||||||
|
#
|
||||||
|
# If you believe there's an error in this file please file an
|
||||||
|
# issue against the rust-lang/cargo repository. If you're
|
||||||
|
# editing this file be aware that the upstream Cargo.toml
|
||||||
|
# will likely look very different (and much more reasonable)
|
||||||
|
|
||||||
|
[package]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "0.7.15"
|
||||||
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
|
exclude = ["/aho-corasick-debug", "/ci/*", "/.travis.yml", "/appveyor.yml"]
|
||||||
|
autotests = false
|
||||||
|
description = "Fast multiple substring searching."
|
||||||
|
homepage = "https://github.com/BurntSushi/aho-corasick"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = ["string", "search", "text", "aho", "multi"]
|
||||||
|
categories = ["text-processing"]
|
||||||
|
license = "Unlicense/MIT"
|
||||||
|
repository = "https://github.com/BurntSushi/aho-corasick"
|
||||||
|
[profile.bench]
|
||||||
|
debug = true
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
debug = true
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "aho_corasick"
|
||||||
|
[dependencies.memchr]
|
||||||
|
version = "2.2.0"
|
||||||
|
default-features = false
|
||||||
|
[dev-dependencies.doc-comment]
|
||||||
|
version = "0.3.1"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["std"]
|
||||||
|
std = ["memchr/use_std"]
|
|
@ -0,0 +1,187 @@
|
||||||
|
aho-corasick
|
||||||
|
============
|
||||||
|
A library for finding occurrences of many patterns at once with SIMD
|
||||||
|
acceleration in some cases. This library provides multiple pattern
|
||||||
|
search principally through an implementation of the
|
||||||
|
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
|
||||||
|
which builds a finite state machine for executing searches in linear time.
|
||||||
|
Features include case insensitive matching, overlapping matches, fast searching
|
||||||
|
via SIMD and optional full DFA construction and search & replace in streams.
|
||||||
|
|
||||||
|
[![Build status](https://github.com/BurntSushi/aho-corasick/workflows/ci/badge.svg)](https://github.com/BurntSushi/aho-corasick/actions)
|
||||||
|
[![](http://meritbadge.herokuapp.com/aho-corasick)](https://crates.io/crates/aho-corasick)
|
||||||
|
|
||||||
|
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||||
|
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
https://docs.rs/aho-corasick
|
||||||
|
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
Add this to your `Cargo.toml`:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[dependencies]
|
||||||
|
aho-corasick = "0.7"
|
||||||
|
```
|
||||||
|
|
||||||
|
and this to your crate root (if you're using Rust 2015):
|
||||||
|
|
||||||
|
```rust
|
||||||
|
extern crate aho_corasick;
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Example: basic searching
|
||||||
|
|
||||||
|
This example shows how to search for occurrences of multiple patterns
|
||||||
|
simultaneously. Each match includes the pattern that matched along with the
|
||||||
|
byte offsets of the match.
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use aho_corasick::AhoCorasick;
|
||||||
|
|
||||||
|
let patterns = &["apple", "maple", "Snapple"];
|
||||||
|
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns);
|
||||||
|
let mut matches = vec![];
|
||||||
|
for mat in ac.find_iter(haystack) {
|
||||||
|
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||||
|
}
|
||||||
|
assert_eq!(matches, vec![
|
||||||
|
(1, 13, 18),
|
||||||
|
(0, 28, 33),
|
||||||
|
(2, 43, 50),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Example: case insensitivity
|
||||||
|
|
||||||
|
This is like the previous example, but matches `Snapple` case insensitively
|
||||||
|
using `AhoCorasickBuilder`:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use aho_corasick::AhoCorasickBuilder;
|
||||||
|
|
||||||
|
let patterns = &["apple", "maple", "snapple"];
|
||||||
|
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||||
|
|
||||||
|
let ac = AhoCorasickBuilder::new()
|
||||||
|
.ascii_case_insensitive(true)
|
||||||
|
.build(patterns);
|
||||||
|
let mut matches = vec![];
|
||||||
|
for mat in ac.find_iter(haystack) {
|
||||||
|
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||||
|
}
|
||||||
|
assert_eq!(matches, vec![
|
||||||
|
(1, 13, 18),
|
||||||
|
(0, 28, 33),
|
||||||
|
(2, 43, 50),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Example: replacing matches in a stream
|
||||||
|
|
||||||
|
This example shows how to execute a search and replace on a stream without
|
||||||
|
loading the entire stream into memory first.
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use aho_corasick::AhoCorasick;
|
||||||
|
|
||||||
|
let patterns = &["fox", "brown", "quick"];
|
||||||
|
let replace_with = &["sloth", "grey", "slow"];
|
||||||
|
|
||||||
|
// In a real example, these might be `std::fs::File`s instead. All you need to
|
||||||
|
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
|
||||||
|
let rdr = "The quick brown fox.";
|
||||||
|
let mut wtr = vec![];
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns);
|
||||||
|
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)
|
||||||
|
.expect("stream_replace_all failed");
|
||||||
|
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Example: finding the leftmost first match
|
||||||
|
|
||||||
|
In the textbook description of Aho-Corasick, its formulation is typically
|
||||||
|
structured such that it reports all possible matches, even when they overlap
|
||||||
|
with another. In many cases, overlapping matches may not be desired, such as
|
||||||
|
the case of finding all successive non-overlapping matches like you might with
|
||||||
|
a standard regular expression.
|
||||||
|
|
||||||
|
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
|
||||||
|
this doesn't always work in the expected way, since it will report matches as
|
||||||
|
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
|
||||||
|
against the text `Samwise`. Most regex engines (that are Perl-like, or
|
||||||
|
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
|
||||||
|
algorithm modified for reporting non-overlapping matches will report `Sam`.
|
||||||
|
|
||||||
|
A novel contribution of this library is the ability to change the match
|
||||||
|
semantics of Aho-Corasick (without additional search time overhead) such that
|
||||||
|
`Samwise` is reported instead. For example, here's the standard approach:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use aho_corasick::AhoCorasick;
|
||||||
|
|
||||||
|
let patterns = &["Samwise", "Sam"];
|
||||||
|
let haystack = "Samwise";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns);
|
||||||
|
let mat = ac.find(haystack).expect("should have a match");
|
||||||
|
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
|
||||||
|
```
|
||||||
|
|
||||||
|
And now here's the leftmost-first version, which matches how a Perl-like
|
||||||
|
regex will work:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use aho_corasick::{AhoCorasickBuilder, MatchKind};
|
||||||
|
|
||||||
|
let patterns = &["Samwise", "Sam"];
|
||||||
|
let haystack = "Samwise";
|
||||||
|
|
||||||
|
let ac = AhoCorasickBuilder::new()
|
||||||
|
.match_kind(MatchKind::LeftmostFirst)
|
||||||
|
.build(patterns);
|
||||||
|
let mat = ac.find(haystack).expect("should have a match");
|
||||||
|
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
|
||||||
|
```
|
||||||
|
|
||||||
|
In addition to leftmost-first semantics, this library also supports
|
||||||
|
leftmost-longest semantics, which match the POSIX behavior of a regular
|
||||||
|
expression alternation. See `MatchKind` in the docs for more details.
|
||||||
|
|
||||||
|
|
||||||
|
### Minimum Rust version policy
|
||||||
|
|
||||||
|
This crate's minimum supported `rustc` version is `1.28.0`.
|
||||||
|
|
||||||
|
The current policy is that the minimum Rust version required to use this crate
|
||||||
|
can be increased in minor version updates. For example, if `crate 1.0` requires
|
||||||
|
Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust
|
||||||
|
1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum
|
||||||
|
version of Rust.
|
||||||
|
|
||||||
|
In general, this crate will be conservative with respect to the minimum
|
||||||
|
supported version of Rust.
|
||||||
|
|
||||||
|
|
||||||
|
### Future work
|
||||||
|
|
||||||
|
Here are some plans for the future:
|
||||||
|
|
||||||
|
* Assuming the current API is sufficient, I'd like to commit to it and release
|
||||||
|
a `1.0` version of this crate some time in the next 6-12 months.
|
||||||
|
* Support stream searching with leftmost match semantics. Currently, only
|
||||||
|
standard match semantics are supported. Getting this right seems possible,
|
||||||
|
but is tricky since the match state needs to be propagated through multiple
|
||||||
|
searches. (With standard semantics, as soon as a match is seen the search
|
||||||
|
ends.)
|
|
@ -6,7 +6,7 @@ use dfa::{self, DFA};
|
||||||
use error::Result;
|
use error::Result;
|
||||||
use nfa::{self, NFA};
|
use nfa::{self, NFA};
|
||||||
use packed;
|
use packed;
|
||||||
use prefilter::PrefilterState;
|
use prefilter::{Prefilter, PrefilterState};
|
||||||
use state_id::StateID;
|
use state_id::StateID;
|
||||||
use Match;
|
use Match;
|
||||||
|
|
||||||
|
@ -502,7 +502,7 @@ impl<S: StateID> AhoCorasick<S> {
|
||||||
/// The closure accepts three parameters: the match found, the text of
|
/// The closure accepts three parameters: the match found, the text of
|
||||||
/// the match and a string buffer with which to write the replaced text
|
/// the match and a string buffer with which to write the replaced text
|
||||||
/// (if any). If the closure returns `true`, then it continues to the next
|
/// (if any). If the closure returns `true`, then it continues to the next
|
||||||
/// match. If the closure returns false, then searching is stopped.
|
/// match. If the closure returns `false`, then searching is stopped.
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
|
@ -524,6 +524,24 @@ impl<S: StateID> AhoCorasick<S> {
|
||||||
/// });
|
/// });
|
||||||
/// assert_eq!("0 the 2 to the 0age", result);
|
/// assert_eq!("0 the 2 to the 0age", result);
|
||||||
/// ```
|
/// ```
|
||||||
|
///
|
||||||
|
/// Stopping the replacement by returning `false` (continued from the
|
||||||
|
/// example above):
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use aho_corasick::{AhoCorasickBuilder, MatchKind};
|
||||||
|
/// # let patterns = &["append", "appendage", "app"];
|
||||||
|
/// # let haystack = "append the app to the appendage";
|
||||||
|
/// # let ac = AhoCorasickBuilder::new()
|
||||||
|
/// # .match_kind(MatchKind::LeftmostFirst)
|
||||||
|
/// # .build(patterns);
|
||||||
|
/// let mut result = String::new();
|
||||||
|
/// ac.replace_all_with(haystack, &mut result, |mat, _, dst| {
|
||||||
|
/// dst.push_str(&mat.pattern().to_string());
|
||||||
|
/// mat.pattern() != 2
|
||||||
|
/// });
|
||||||
|
/// assert_eq!("0 the 2 to the appendage", result);
|
||||||
|
/// ```
|
||||||
pub fn replace_all_with<F>(
|
pub fn replace_all_with<F>(
|
||||||
&self,
|
&self,
|
||||||
haystack: &str,
|
haystack: &str,
|
||||||
|
@ -536,7 +554,9 @@ impl<S: StateID> AhoCorasick<S> {
|
||||||
for mat in self.find_iter(haystack) {
|
for mat in self.find_iter(haystack) {
|
||||||
dst.push_str(&haystack[last_match..mat.start()]);
|
dst.push_str(&haystack[last_match..mat.start()]);
|
||||||
last_match = mat.end();
|
last_match = mat.end();
|
||||||
replace_with(&mat, &haystack[mat.start()..mat.end()], dst);
|
if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) {
|
||||||
|
break;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
dst.push_str(&haystack[last_match..]);
|
dst.push_str(&haystack[last_match..]);
|
||||||
}
|
}
|
||||||
|
@ -548,7 +568,7 @@ impl<S: StateID> AhoCorasick<S> {
|
||||||
/// The closure accepts three parameters: the match found, the text of
|
/// The closure accepts three parameters: the match found, the text of
|
||||||
/// the match and a byte buffer with which to write the replaced text
|
/// the match and a byte buffer with which to write the replaced text
|
||||||
/// (if any). If the closure returns `true`, then it continues to the next
|
/// (if any). If the closure returns `true`, then it continues to the next
|
||||||
/// match. If the closure returns false, then searching is stopped.
|
/// match. If the closure returns `false`, then searching is stopped.
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
|
@ -570,6 +590,24 @@ impl<S: StateID> AhoCorasick<S> {
|
||||||
/// });
|
/// });
|
||||||
/// assert_eq!(b"0 the 2 to the 0age".to_vec(), result);
|
/// assert_eq!(b"0 the 2 to the 0age".to_vec(), result);
|
||||||
/// ```
|
/// ```
|
||||||
|
///
|
||||||
|
/// Stopping the replacement by returning `false` (continued from the
|
||||||
|
/// example above):
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use aho_corasick::{AhoCorasickBuilder, MatchKind};
|
||||||
|
/// # let patterns = &["append", "appendage", "app"];
|
||||||
|
/// # let haystack = b"append the app to the appendage";
|
||||||
|
/// # let ac = AhoCorasickBuilder::new()
|
||||||
|
/// # .match_kind(MatchKind::LeftmostFirst)
|
||||||
|
/// # .build(patterns);
|
||||||
|
/// let mut result = vec![];
|
||||||
|
/// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| {
|
||||||
|
/// dst.extend(mat.pattern().to_string().bytes());
|
||||||
|
/// mat.pattern() != 2
|
||||||
|
/// });
|
||||||
|
/// assert_eq!(b"0 the 2 to the appendage".to_vec(), result);
|
||||||
|
/// ```
|
||||||
pub fn replace_all_with_bytes<F>(
|
pub fn replace_all_with_bytes<F>(
|
||||||
&self,
|
&self,
|
||||||
haystack: &[u8],
|
haystack: &[u8],
|
||||||
|
@ -582,7 +620,9 @@ impl<S: StateID> AhoCorasick<S> {
|
||||||
for mat in self.find_iter(haystack) {
|
for mat in self.find_iter(haystack) {
|
||||||
dst.extend(&haystack[last_match..mat.start()]);
|
dst.extend(&haystack[last_match..mat.start()]);
|
||||||
last_match = mat.end();
|
last_match = mat.end();
|
||||||
replace_with(&mat, &haystack[mat.start()..mat.end()], dst);
|
if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) {
|
||||||
|
break;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
dst.extend(&haystack[last_match..]);
|
dst.extend(&haystack[last_match..]);
|
||||||
}
|
}
|
||||||
|
@ -735,9 +775,7 @@ impl<S: StateID> AhoCorasick<S> {
|
||||||
/// [`find_iter`](struct.AhoCorasick.html#method.find_iter).
|
/// [`find_iter`](struct.AhoCorasick.html#method.find_iter).
|
||||||
///
|
///
|
||||||
/// The closure accepts three parameters: the match found, the text of
|
/// The closure accepts three parameters: the match found, the text of
|
||||||
/// the match and the writer with which to write the replaced text
|
/// the match and the writer with which to write the replaced text (if any).
|
||||||
/// (if any). If the closure returns `true`, then it continues to the next
|
|
||||||
/// match. If the closure returns false, then searching is stopped.
|
|
||||||
///
|
///
|
||||||
/// After all matches are replaced, the writer is _not_ flushed.
|
/// After all matches are replaced, the writer is _not_ flushed.
|
||||||
///
|
///
|
||||||
|
@ -1037,6 +1075,24 @@ impl<S: StateID> Imp<S> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the prefilter object, if one exists, for the underlying
|
||||||
|
/// automaton.
|
||||||
|
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
||||||
|
match *self {
|
||||||
|
Imp::NFA(ref nfa) => nfa.prefilter(),
|
||||||
|
Imp::DFA(ref dfa) => dfa.prefilter(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if we should attempt to use a prefilter.
|
||||||
|
fn use_prefilter(&self) -> bool {
|
||||||
|
let p = match self.prefilter() {
|
||||||
|
None => return false,
|
||||||
|
Some(p) => p,
|
||||||
|
};
|
||||||
|
!p.looks_for_non_start_of_match()
|
||||||
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn overlapping_find_at(
|
fn overlapping_find_at(
|
||||||
&self,
|
&self,
|
||||||
|
@ -1325,7 +1381,11 @@ impl<'a, R: io::Read, S: StateID> StreamChunkIter<'a, R, S> {
|
||||||
"stream searching is only supported for Standard match semantics"
|
"stream searching is only supported for Standard match semantics"
|
||||||
);
|
);
|
||||||
|
|
||||||
let prestate = PrefilterState::new(ac.max_pattern_len());
|
let prestate = if ac.imp.use_prefilter() {
|
||||||
|
PrefilterState::new(ac.max_pattern_len())
|
||||||
|
} else {
|
||||||
|
PrefilterState::disabled()
|
||||||
|
};
|
||||||
let buf = Buffer::new(ac.imp.max_pattern_len());
|
let buf = Buffer::new(ac.imp.max_pattern_len());
|
||||||
let state_id = ac.imp.start_state();
|
let state_id = ac.imp.start_state();
|
||||||
StreamChunkIter {
|
StreamChunkIter {
|
||||||
|
@ -1809,7 +1869,7 @@ impl AhoCorasickBuilder {
|
||||||
/// finite automaton (NFA) is used instead.
|
/// finite automaton (NFA) is used instead.
|
||||||
///
|
///
|
||||||
/// The main benefit to a DFA is that it can execute searches more quickly
|
/// The main benefit to a DFA is that it can execute searches more quickly
|
||||||
/// than a DFA (perhaps 2-4 times as fast). The main drawback is that the
|
/// than a NFA (perhaps 2-4 times as fast). The main drawback is that the
|
||||||
/// DFA uses more space and can take much longer to build.
|
/// DFA uses more space and can take much longer to build.
|
||||||
///
|
///
|
||||||
/// Enabling this option does not change the time complexity for
|
/// Enabling this option does not change the time complexity for
|
|
@ -0,0 +1,132 @@
|
||||||
|
use std::cmp;
|
||||||
|
use std::io;
|
||||||
|
use std::ptr;
|
||||||
|
|
||||||
|
/// The default buffer capacity that we use for the stream buffer.
|
||||||
|
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB
|
||||||
|
|
||||||
|
/// A fairly simple roll buffer for supporting stream searches.
|
||||||
|
///
|
||||||
|
/// This buffer acts as a temporary place to store a fixed amount of data when
|
||||||
|
/// reading from a stream. Its central purpose is to allow "rolling" some
|
||||||
|
/// suffix of the data to the beginning of the buffer before refilling it with
|
||||||
|
/// more data from the stream. For example, let's say we are trying to match
|
||||||
|
/// "foobar" on a stream. When we report the match, we'd like to not only
|
||||||
|
/// report the correct offsets at which the match occurs, but also the matching
|
||||||
|
/// bytes themselves. So let's say our stream is a file with the following
|
||||||
|
/// contents: `test test foobar test test`. Now assume that we happen to read
|
||||||
|
/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
|
||||||
|
/// Naively, it would not be possible to report a single contiguous `foobar`
|
||||||
|
/// match, but this roll buffer allows us to do that. Namely, after the second
|
||||||
|
/// read, the contents of the buffer should be `st foobar test test`, where the
|
||||||
|
/// search should ultimately resume immediately after `foo`. (The prefix `st `
|
||||||
|
/// is included because the roll buffer saves N bytes at the end of the buffer,
|
||||||
|
/// where N is the maximum possible length of a match.)
|
||||||
|
///
|
||||||
|
/// A lot of the logic for dealing with this is unfortunately split out between
|
||||||
|
/// this roll buffer and the `StreamChunkIter`.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Buffer {
|
||||||
|
/// The raw buffer contents. This has a fixed size and never increases.
|
||||||
|
buf: Vec<u8>,
|
||||||
|
/// The minimum size of the buffer, which is equivalent to the maximum
|
||||||
|
/// possible length of a match. This corresponds to the amount that we
|
||||||
|
/// roll
|
||||||
|
min: usize,
|
||||||
|
/// The end of the contents of this buffer.
|
||||||
|
end: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Buffer {
|
||||||
|
/// Create a new buffer for stream searching. The minimum buffer length
|
||||||
|
/// given should be the size of the maximum possible match length.
|
||||||
|
pub fn new(min_buffer_len: usize) -> Buffer {
|
||||||
|
let min = cmp::max(1, min_buffer_len);
|
||||||
|
// The minimum buffer amount is also the amount that we roll our
|
||||||
|
// buffer in order to support incremental searching. To this end,
|
||||||
|
// our actual capacity needs to be at least 1 byte bigger than our
|
||||||
|
// minimum amount, otherwise we won't have any overlap. In actuality,
|
||||||
|
// we want our buffer to be a bit bigger than that for performance
|
||||||
|
// reasons, so we set a lower bound of `8 * min`.
|
||||||
|
//
|
||||||
|
// TODO: It would be good to find a way to test the streaming
|
||||||
|
// implementation with the minimal buffer size. For now, we just
|
||||||
|
// uncomment out the next line and comment out the subsequent line.
|
||||||
|
// let capacity = 1 + min;
|
||||||
|
let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
|
||||||
|
Buffer { buf: vec![0; capacity], min, end: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the contents of this buffer.
|
||||||
|
#[inline]
|
||||||
|
pub fn buffer(&self) -> &[u8] {
|
||||||
|
&self.buf[..self.end]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the minimum size of the buffer. The only way a buffer may be
|
||||||
|
/// smaller than this is if the stream itself contains less than the
|
||||||
|
/// minimum buffer amount.
|
||||||
|
#[inline]
|
||||||
|
pub fn min_buffer_len(&self) -> usize {
|
||||||
|
self.min
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the total length of the contents in the buffer.
|
||||||
|
#[inline]
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.end
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return all free capacity in this buffer.
|
||||||
|
fn free_buffer(&mut self) -> &mut [u8] {
|
||||||
|
&mut self.buf[self.end..]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Refill the contents of this buffer by reading as much as possible into
|
||||||
|
/// this buffer's free capacity. If no more bytes could be read, then this
|
||||||
|
/// returns false. Otherwise, this reads until it has filled the buffer
|
||||||
|
/// past the minimum amount.
|
||||||
|
pub fn fill<R: io::Read>(&mut self, mut rdr: R) -> io::Result<bool> {
|
||||||
|
let mut readany = false;
|
||||||
|
loop {
|
||||||
|
let readlen = rdr.read(self.free_buffer())?;
|
||||||
|
if readlen == 0 {
|
||||||
|
return Ok(readany);
|
||||||
|
}
|
||||||
|
readany = true;
|
||||||
|
self.end += readlen;
|
||||||
|
if self.len() >= self.min {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Roll the contents of the buffer so that the suffix of this buffer is
|
||||||
|
/// moved to the front and all other contents are dropped. The size of the
|
||||||
|
/// suffix corresponds precisely to the minimum buffer length.
|
||||||
|
///
|
||||||
|
/// This should only be called when the entire contents of this buffer have
|
||||||
|
/// been searched.
|
||||||
|
pub fn roll(&mut self) {
|
||||||
|
let roll_start = self
|
||||||
|
.end
|
||||||
|
.checked_sub(self.min)
|
||||||
|
.expect("buffer capacity should be bigger than minimum amount");
|
||||||
|
let roll_len = self.min;
|
||||||
|
|
||||||
|
assert!(roll_start + roll_len <= self.end);
|
||||||
|
unsafe {
|
||||||
|
// SAFETY: A buffer contains Copy data, so there's no problem
|
||||||
|
// moving it around. Safety also depends on our indices being in
|
||||||
|
// bounds, which they always should be, given the assert above.
|
||||||
|
//
|
||||||
|
// TODO: Switch to [T]::copy_within once our MSRV is high enough.
|
||||||
|
ptr::copy(
|
||||||
|
self.buf[roll_start..].as_ptr(),
|
||||||
|
self.buf.as_mut_ptr(),
|
||||||
|
roll_len,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
self.end = roll_len;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,713 @@
|
||||||
|
use std::mem::size_of;
|
||||||
|
|
||||||
|
use ahocorasick::MatchKind;
|
||||||
|
use automaton::Automaton;
|
||||||
|
use classes::ByteClasses;
|
||||||
|
use error::Result;
|
||||||
|
use nfa::{PatternID, PatternLength, NFA};
|
||||||
|
use prefilter::{Prefilter, PrefilterObj, PrefilterState};
|
||||||
|
use state_id::{dead_id, fail_id, premultiply_overflow_error, StateID};
|
||||||
|
use Match;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum DFA<S> {
|
||||||
|
Standard(Standard<S>),
|
||||||
|
ByteClass(ByteClass<S>),
|
||||||
|
Premultiplied(Premultiplied<S>),
|
||||||
|
PremultipliedByteClass(PremultipliedByteClass<S>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: StateID> DFA<S> {
|
||||||
|
fn repr(&self) -> &Repr<S> {
|
||||||
|
match *self {
|
||||||
|
DFA::Standard(ref dfa) => dfa.repr(),
|
||||||
|
DFA::ByteClass(ref dfa) => dfa.repr(),
|
||||||
|
DFA::Premultiplied(ref dfa) => dfa.repr(),
|
||||||
|
DFA::PremultipliedByteClass(ref dfa) => dfa.repr(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn match_kind(&self) -> &MatchKind {
|
||||||
|
&self.repr().match_kind
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn heap_bytes(&self) -> usize {
|
||||||
|
self.repr().heap_bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn max_pattern_len(&self) -> usize {
|
||||||
|
self.repr().max_pattern_len
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pattern_count(&self) -> usize {
|
||||||
|
self.repr().pattern_count
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn prefilter(&self) -> Option<&dyn Prefilter> {
|
||||||
|
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn start_state(&self) -> S {
|
||||||
|
self.repr().start_id
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn overlapping_find_at(
|
||||||
|
&self,
|
||||||
|
prestate: &mut PrefilterState,
|
||||||
|
haystack: &[u8],
|
||||||
|
at: usize,
|
||||||
|
state_id: &mut S,
|
||||||
|
match_index: &mut usize,
|
||||||
|
) -> Option<Match> {
|
||||||
|
match *self {
|
||||||
|
DFA::Standard(ref dfa) => dfa.overlapping_find_at(
|
||||||
|
prestate,
|
||||||
|
haystack,
|
||||||
|
at,
|
||||||
|
state_id,
|
||||||
|
match_index,
|
||||||
|
),
|
||||||
|
DFA::ByteClass(ref dfa) => dfa.overlapping_find_at(
|
||||||
|
prestate,
|
||||||
|
haystack,
|
||||||
|
at,
|
||||||
|
state_id,
|
||||||
|
match_index,
|
||||||
|
),
|
||||||
|
DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at(
|
||||||
|
prestate,
|
||||||
|
haystack,
|
||||||
|
at,
|
||||||
|
state_id,
|
||||||
|
match_index,
|
||||||
|
),
|
||||||
|
DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at(
|
||||||
|
prestate,
|
||||||
|
haystack,
|
||||||
|
at,
|
||||||
|
state_id,
|
||||||
|
match_index,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn earliest_find_at(
|
||||||
|
&self,
|
||||||
|
prestate: &mut PrefilterState,
|
||||||
|
haystack: &[u8],
|
||||||
|
at: usize,
|
||||||
|
state_id: &mut S,
|
||||||
|
) -> Option<Match> {
|
||||||
|
match *self {
|
||||||
|
DFA::Standard(ref dfa) => {
|
||||||
|
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
||||||
|
}
|
||||||
|
DFA::ByteClass(ref dfa) => {
|
||||||
|
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
||||||
|
}
|
||||||
|
DFA::Premultiplied(ref dfa) => {
|
||||||
|
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
||||||
|
}
|
||||||
|
DFA::PremultipliedByteClass(ref dfa) => {
|
||||||
|
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn find_at_no_state(
|
||||||
|
&self,
|
||||||
|
prestate: &mut PrefilterState,
|
||||||
|
haystack: &[u8],
|
||||||
|
at: usize,
|
||||||
|
) -> Option<Match> {
|
||||||
|
match *self {
|
||||||
|
DFA::Standard(ref dfa) => {
|
||||||
|
dfa.find_at_no_state(prestate, haystack, at)
|
||||||
|
}
|
||||||
|
DFA::ByteClass(ref dfa) => {
|
||||||
|
dfa.find_at_no_state(prestate, haystack, at)
|
||||||
|
}
|
||||||
|
DFA::Premultiplied(ref dfa) => {
|
||||||
|
dfa.find_at_no_state(prestate, haystack, at)
|
||||||
|
}
|
||||||
|
DFA::PremultipliedByteClass(ref dfa) => {
|
||||||
|
dfa.find_at_no_state(prestate, haystack, at)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Standard<S>(Repr<S>);
|
||||||
|
|
||||||
|
impl<S: StateID> Standard<S> {
|
||||||
|
fn repr(&self) -> &Repr<S> {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: StateID> Automaton for Standard<S> {
|
||||||
|
type ID = S;
|
||||||
|
|
||||||
|
fn match_kind(&self) -> &MatchKind {
|
||||||
|
&self.repr().match_kind
|
||||||
|
}
|
||||||
|
|
||||||
|
fn anchored(&self) -> bool {
|
||||||
|
self.repr().anchored
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
||||||
|
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_state(&self) -> S {
|
||||||
|
self.repr().start_id
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_valid(&self, id: S) -> bool {
|
||||||
|
id.to_usize() < self.repr().state_count
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_match_state(&self, id: S) -> bool {
|
||||||
|
self.repr().is_match_state(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_match_or_dead_state(&self, id: S) -> bool {
|
||||||
|
self.repr().is_match_or_dead_state(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_match(
|
||||||
|
&self,
|
||||||
|
id: S,
|
||||||
|
match_index: usize,
|
||||||
|
end: usize,
|
||||||
|
) -> Option<Match> {
|
||||||
|
self.repr().get_match(id, match_index, end)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn match_count(&self, id: S) -> usize {
|
||||||
|
self.repr().match_count(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_state(&self, current: S, input: u8) -> S {
|
||||||
|
let o = current.to_usize() * 256 + input as usize;
|
||||||
|
self.repr().trans[o]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct ByteClass<S>(Repr<S>);
|
||||||
|
|
||||||
|
impl<S: StateID> ByteClass<S> {
|
||||||
|
fn repr(&self) -> &Repr<S> {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: StateID> Automaton for ByteClass<S> {
|
||||||
|
type ID = S;
|
||||||
|
|
||||||
|
fn match_kind(&self) -> &MatchKind {
|
||||||
|
&self.repr().match_kind
|
||||||
|
}
|
||||||
|
|
||||||
|
fn anchored(&self) -> bool {
|
||||||
|
self.repr().anchored
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
||||||
|
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_state(&self) -> S {
|
||||||
|
self.repr().start_id
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_valid(&self, id: S) -> bool {
|
||||||
|
id.to_usize() < self.repr().state_count
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_match_state(&self, id: S) -> bool {
|
||||||
|
self.repr().is_match_state(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_match_or_dead_state(&self, id: S) -> bool {
|
||||||
|
self.repr().is_match_or_dead_state(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_match(
|
||||||
|
&self,
|
||||||
|
id: S,
|
||||||
|
match_index: usize,
|
||||||
|
end: usize,
|
||||||
|
) -> Option<Match> {
|
||||||
|
self.repr().get_match(id, match_index, end)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn match_count(&self, id: S) -> usize {
|
||||||
|
self.repr().match_count(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_state(&self, current: S, input: u8) -> S {
|
||||||
|
let alphabet_len = self.repr().byte_classes.alphabet_len();
|
||||||
|
let input = self.repr().byte_classes.get(input);
|
||||||
|
let o = current.to_usize() * alphabet_len + input as usize;
|
||||||
|
self.repr().trans[o]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Premultiplied<S>(Repr<S>);
|
||||||
|
|
||||||
|
impl<S: StateID> Premultiplied<S> {
|
||||||
|
fn repr(&self) -> &Repr<S> {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: StateID> Automaton for Premultiplied<S> {
|
||||||
|
type ID = S;
|
||||||
|
|
||||||
|
fn match_kind(&self) -> &MatchKind {
|
||||||
|
&self.repr().match_kind
|
||||||
|
}
|
||||||
|
|
||||||
|
fn anchored(&self) -> bool {
|
||||||
|
self.repr().anchored
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
||||||
|
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_state(&self) -> S {
|
||||||
|
self.repr().start_id
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_valid(&self, id: S) -> bool {
|
||||||
|
(id.to_usize() / 256) < self.repr().state_count
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_match_state(&self, id: S) -> bool {
|
||||||
|
self.repr().is_match_state(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_match_or_dead_state(&self, id: S) -> bool {
|
||||||
|
self.repr().is_match_or_dead_state(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_match(
|
||||||
|
&self,
|
||||||
|
id: S,
|
||||||
|
match_index: usize,
|
||||||
|
end: usize,
|
||||||
|
) -> Option<Match> {
|
||||||
|
if id > self.repr().max_match {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
self.repr()
|
||||||
|
.matches
|
||||||
|
.get(id.to_usize() / 256)
|
||||||
|
.and_then(|m| m.get(match_index))
|
||||||
|
.map(|&(id, len)| Match { pattern: id, len, end })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn match_count(&self, id: S) -> usize {
|
||||||
|
let o = id.to_usize() / 256;
|
||||||
|
self.repr().matches[o].len()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_state(&self, current: S, input: u8) -> S {
|
||||||
|
let o = current.to_usize() + input as usize;
|
||||||
|
self.repr().trans[o]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct PremultipliedByteClass<S>(Repr<S>);
|
||||||
|
|
||||||
|
impl<S: StateID> PremultipliedByteClass<S> {
|
||||||
|
fn repr(&self) -> &Repr<S> {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: StateID> Automaton for PremultipliedByteClass<S> {
|
||||||
|
type ID = S;
|
||||||
|
|
||||||
|
fn match_kind(&self) -> &MatchKind {
|
||||||
|
&self.repr().match_kind
|
||||||
|
}
|
||||||
|
|
||||||
|
fn anchored(&self) -> bool {
|
||||||
|
self.repr().anchored
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
||||||
|
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_state(&self) -> S {
|
||||||
|
self.repr().start_id
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_valid(&self, id: S) -> bool {
|
||||||
|
(id.to_usize() / self.repr().alphabet_len()) < self.repr().state_count
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_match_state(&self, id: S) -> bool {
|
||||||
|
self.repr().is_match_state(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_match_or_dead_state(&self, id: S) -> bool {
|
||||||
|
self.repr().is_match_or_dead_state(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_match(
|
||||||
|
&self,
|
||||||
|
id: S,
|
||||||
|
match_index: usize,
|
||||||
|
end: usize,
|
||||||
|
) -> Option<Match> {
|
||||||
|
if id > self.repr().max_match {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
self.repr()
|
||||||
|
.matches
|
||||||
|
.get(id.to_usize() / self.repr().alphabet_len())
|
||||||
|
.and_then(|m| m.get(match_index))
|
||||||
|
.map(|&(id, len)| Match { pattern: id, len, end })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn match_count(&self, id: S) -> usize {
|
||||||
|
let o = id.to_usize() / self.repr().alphabet_len();
|
||||||
|
self.repr().matches[o].len()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_state(&self, current: S, input: u8) -> S {
|
||||||
|
let input = self.repr().byte_classes.get(input);
|
||||||
|
let o = current.to_usize() + input as usize;
|
||||||
|
self.repr().trans[o]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Repr<S> {
|
||||||
|
match_kind: MatchKind,
|
||||||
|
anchored: bool,
|
||||||
|
premultiplied: bool,
|
||||||
|
start_id: S,
|
||||||
|
/// The length, in bytes, of the longest pattern in this automaton. This
|
||||||
|
/// information is useful for keeping correct buffer sizes when searching
|
||||||
|
/// on streams.
|
||||||
|
max_pattern_len: usize,
|
||||||
|
/// The total number of patterns added to this automaton. This includes
|
||||||
|
/// patterns that may never match.
|
||||||
|
pattern_count: usize,
|
||||||
|
state_count: usize,
|
||||||
|
max_match: S,
|
||||||
|
/// The number of bytes of heap used by this NFA's transition table.
|
||||||
|
heap_bytes: usize,
|
||||||
|
/// A prefilter for quickly detecting candidate matchs, if pertinent.
|
||||||
|
prefilter: Option<PrefilterObj>,
|
||||||
|
byte_classes: ByteClasses,
|
||||||
|
trans: Vec<S>,
|
||||||
|
matches: Vec<Vec<(PatternID, PatternLength)>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: StateID> Repr<S> {
|
||||||
|
/// Returns the total alphabet size for this DFA.
|
||||||
|
///
|
||||||
|
/// If byte classes are enabled, then this corresponds to the number of
|
||||||
|
/// equivalence classes. If they are disabled, then this is always 256.
|
||||||
|
fn alphabet_len(&self) -> usize {
|
||||||
|
self.byte_classes.alphabet_len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true only if the given state is a match state.
|
||||||
|
fn is_match_state(&self, id: S) -> bool {
|
||||||
|
id <= self.max_match && id > dead_id()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true only if the given state is either a dead state or a match
|
||||||
|
/// state.
|
||||||
|
fn is_match_or_dead_state(&self, id: S) -> bool {
|
||||||
|
id <= self.max_match
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the ith match for the given state, where the end position of a
|
||||||
|
/// match was found at `end`.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// The caller must ensure that the given state identifier is valid,
|
||||||
|
/// otherwise this may panic. The `match_index` need not be valid. That is,
|
||||||
|
/// if the given state has no matches then this returns `None`.
|
||||||
|
fn get_match(
|
||||||
|
&self,
|
||||||
|
id: S,
|
||||||
|
match_index: usize,
|
||||||
|
end: usize,
|
||||||
|
) -> Option<Match> {
|
||||||
|
if id > self.max_match {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
self.matches
|
||||||
|
.get(id.to_usize())
|
||||||
|
.and_then(|m| m.get(match_index))
|
||||||
|
.map(|&(id, len)| Match { pattern: id, len, end })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the total number of matches for the given state.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// The caller must ensure that the given identifier is valid, or else
|
||||||
|
/// this panics.
|
||||||
|
fn match_count(&self, id: S) -> usize {
|
||||||
|
self.matches[id.to_usize()].len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the next state given `from` as the current state and `byte` as the
|
||||||
|
/// current input byte.
|
||||||
|
fn next_state(&self, from: S, byte: u8) -> S {
|
||||||
|
let alphabet_len = self.alphabet_len();
|
||||||
|
let byte = self.byte_classes.get(byte);
|
||||||
|
self.trans[from.to_usize() * alphabet_len + byte as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the `byte` transition for the `from` state to point to `to`.
|
||||||
|
fn set_next_state(&mut self, from: S, byte: u8, to: S) {
|
||||||
|
let alphabet_len = self.alphabet_len();
|
||||||
|
let byte = self.byte_classes.get(byte);
|
||||||
|
self.trans[from.to_usize() * alphabet_len + byte as usize] = to;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Swap the given states in place.
|
||||||
|
fn swap_states(&mut self, id1: S, id2: S) {
|
||||||
|
assert!(!self.premultiplied, "can't swap states in premultiplied DFA");
|
||||||
|
|
||||||
|
let o1 = id1.to_usize() * self.alphabet_len();
|
||||||
|
let o2 = id2.to_usize() * self.alphabet_len();
|
||||||
|
for b in 0..self.alphabet_len() {
|
||||||
|
self.trans.swap(o1 + b, o2 + b);
|
||||||
|
}
|
||||||
|
self.matches.swap(id1.to_usize(), id2.to_usize());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This routine shuffles all match states in this DFA to the beginning
|
||||||
|
/// of the DFA such that every non-match state appears after every match
|
||||||
|
/// state. (With one exception: the special fail and dead states remain as
|
||||||
|
/// the first two states.)
|
||||||
|
///
|
||||||
|
/// The purpose of doing this shuffling is to avoid an extra conditional
|
||||||
|
/// in the search loop, and in particular, detecting whether a state is a
|
||||||
|
/// match or not does not need to access any memory.
|
||||||
|
///
|
||||||
|
/// This updates `self.max_match` to point to the last matching state as
|
||||||
|
/// well as `self.start` if the starting state was moved.
|
||||||
|
fn shuffle_match_states(&mut self) {
|
||||||
|
assert!(
|
||||||
|
!self.premultiplied,
|
||||||
|
"cannot shuffle match states of premultiplied DFA"
|
||||||
|
);
|
||||||
|
|
||||||
|
if self.state_count <= 1 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut first_non_match = self.start_id.to_usize();
|
||||||
|
while first_non_match < self.state_count
|
||||||
|
&& self.matches[first_non_match].len() > 0
|
||||||
|
{
|
||||||
|
first_non_match += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut swaps: Vec<S> = vec![fail_id(); self.state_count];
|
||||||
|
let mut cur = self.state_count - 1;
|
||||||
|
while cur > first_non_match {
|
||||||
|
if self.matches[cur].len() > 0 {
|
||||||
|
self.swap_states(
|
||||||
|
S::from_usize(cur),
|
||||||
|
S::from_usize(first_non_match),
|
||||||
|
);
|
||||||
|
swaps[cur] = S::from_usize(first_non_match);
|
||||||
|
swaps[first_non_match] = S::from_usize(cur);
|
||||||
|
|
||||||
|
first_non_match += 1;
|
||||||
|
while first_non_match < cur
|
||||||
|
&& self.matches[first_non_match].len() > 0
|
||||||
|
{
|
||||||
|
first_non_match += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur -= 1;
|
||||||
|
}
|
||||||
|
for id in (0..self.state_count).map(S::from_usize) {
|
||||||
|
let alphabet_len = self.alphabet_len();
|
||||||
|
let offset = id.to_usize() * alphabet_len;
|
||||||
|
for next in &mut self.trans[offset..offset + alphabet_len] {
|
||||||
|
if swaps[next.to_usize()] != fail_id() {
|
||||||
|
*next = swaps[next.to_usize()];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if swaps[self.start_id.to_usize()] != fail_id() {
|
||||||
|
self.start_id = swaps[self.start_id.to_usize()];
|
||||||
|
}
|
||||||
|
self.max_match = S::from_usize(first_non_match - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn premultiply(&mut self) -> Result<()> {
|
||||||
|
if self.premultiplied || self.state_count <= 1 {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let alpha_len = self.alphabet_len();
|
||||||
|
premultiply_overflow_error(
|
||||||
|
S::from_usize(self.state_count - 1),
|
||||||
|
alpha_len,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
for id in (2..self.state_count).map(S::from_usize) {
|
||||||
|
let offset = id.to_usize() * alpha_len;
|
||||||
|
for next in &mut self.trans[offset..offset + alpha_len] {
|
||||||
|
if *next == dead_id() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
*next = S::from_usize(next.to_usize() * alpha_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.premultiplied = true;
|
||||||
|
self.start_id = S::from_usize(self.start_id.to_usize() * alpha_len);
|
||||||
|
self.max_match = S::from_usize(self.max_match.to_usize() * alpha_len);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes the total amount of heap used by this NFA in bytes.
|
||||||
|
fn calculate_size(&mut self) {
|
||||||
|
let mut size = (self.trans.len() * size_of::<S>())
|
||||||
|
+ (self.matches.len()
|
||||||
|
* size_of::<Vec<(PatternID, PatternLength)>>());
|
||||||
|
for state_matches in &self.matches {
|
||||||
|
size +=
|
||||||
|
state_matches.len() * size_of::<(PatternID, PatternLength)>();
|
||||||
|
}
|
||||||
|
size += self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes());
|
||||||
|
self.heap_bytes = size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A builder for configuring the determinization of an NFA into a DFA.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Builder {
|
||||||
|
premultiply: bool,
|
||||||
|
byte_classes: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Builder {
|
||||||
|
/// Create a new builder for a DFA.
|
||||||
|
pub fn new() -> Builder {
|
||||||
|
Builder { premultiply: true, byte_classes: true }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a DFA from the given NFA.
|
||||||
|
///
|
||||||
|
/// This returns an error if the state identifiers exceed their
|
||||||
|
/// representation size. This can only happen when state ids are
|
||||||
|
/// premultiplied (which is enabled by default).
|
||||||
|
pub fn build<S: StateID>(&self, nfa: &NFA<S>) -> Result<DFA<S>> {
|
||||||
|
let byte_classes = if self.byte_classes {
|
||||||
|
nfa.byte_classes().clone()
|
||||||
|
} else {
|
||||||
|
ByteClasses::singletons()
|
||||||
|
};
|
||||||
|
let alphabet_len = byte_classes.alphabet_len();
|
||||||
|
let trans = vec![fail_id(); alphabet_len * nfa.state_len()];
|
||||||
|
let matches = vec![vec![]; nfa.state_len()];
|
||||||
|
let mut repr = Repr {
|
||||||
|
match_kind: nfa.match_kind().clone(),
|
||||||
|
anchored: nfa.anchored(),
|
||||||
|
premultiplied: false,
|
||||||
|
start_id: nfa.start_state(),
|
||||||
|
max_pattern_len: nfa.max_pattern_len(),
|
||||||
|
pattern_count: nfa.pattern_count(),
|
||||||
|
state_count: nfa.state_len(),
|
||||||
|
max_match: fail_id(),
|
||||||
|
heap_bytes: 0,
|
||||||
|
prefilter: nfa.prefilter_obj().map(|p| p.clone()),
|
||||||
|
byte_classes: byte_classes.clone(),
|
||||||
|
trans,
|
||||||
|
matches,
|
||||||
|
};
|
||||||
|
for id in (0..nfa.state_len()).map(S::from_usize) {
|
||||||
|
repr.matches[id.to_usize()].extend_from_slice(nfa.matches(id));
|
||||||
|
|
||||||
|
let fail = nfa.failure_transition(id);
|
||||||
|
nfa.iter_all_transitions(&byte_classes, id, |b, mut next| {
|
||||||
|
if next == fail_id() {
|
||||||
|
next = nfa_next_state_memoized(nfa, &repr, id, fail, b);
|
||||||
|
}
|
||||||
|
repr.set_next_state(id, b, next);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
repr.shuffle_match_states();
|
||||||
|
repr.calculate_size();
|
||||||
|
if self.premultiply {
|
||||||
|
repr.premultiply()?;
|
||||||
|
if byte_classes.is_singleton() {
|
||||||
|
Ok(DFA::Premultiplied(Premultiplied(repr)))
|
||||||
|
} else {
|
||||||
|
Ok(DFA::PremultipliedByteClass(PremultipliedByteClass(repr)))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if byte_classes.is_singleton() {
|
||||||
|
Ok(DFA::Standard(Standard(repr)))
|
||||||
|
} else {
|
||||||
|
Ok(DFA::ByteClass(ByteClass(repr)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether to use byte classes or in the DFA.
|
||||||
|
pub fn byte_classes(&mut self, yes: bool) -> &mut Builder {
|
||||||
|
self.byte_classes = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether to premultiply state identifier in the DFA.
|
||||||
|
pub fn premultiply(&mut self, yes: bool) -> &mut Builder {
|
||||||
|
self.premultiply = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This returns the next NFA transition (including resolving failure
|
||||||
|
/// transitions), except once it sees a state id less than the id of the DFA
|
||||||
|
/// state that is currently being populated, then we no longer need to follow
|
||||||
|
/// failure transitions and can instead query the pre-computed state id from
|
||||||
|
/// the DFA itself.
|
||||||
|
///
|
||||||
|
/// In general, this should only be called when a failure transition is seen.
|
||||||
|
fn nfa_next_state_memoized<S: StateID>(
|
||||||
|
nfa: &NFA<S>,
|
||||||
|
dfa: &Repr<S>,
|
||||||
|
populating: S,
|
||||||
|
mut current: S,
|
||||||
|
input: u8,
|
||||||
|
) -> S {
|
||||||
|
loop {
|
||||||
|
if current < populating {
|
||||||
|
return dfa.next_state(current, input);
|
||||||
|
}
|
||||||
|
let next = nfa.next_state(current, input);
|
||||||
|
if next != fail_id() {
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
current = nfa.failure_transition(current);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,298 @@
|
||||||
|
/*!
|
||||||
|
A library for finding occurrences of many patterns at once. This library
|
||||||
|
provides multiple pattern search principally through an implementation of the
|
||||||
|
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
|
||||||
|
which builds a fast finite state machine for executing searches in linear time.
|
||||||
|
|
||||||
|
Additionally, this library provides a number of configuration options for
|
||||||
|
building the automaton that permit controlling the space versus time trade
|
||||||
|
off. Other features include simple ASCII case insensitive matching, finding
|
||||||
|
overlapping matches, replacements, searching streams and even searching and
|
||||||
|
replacing text in streams.
|
||||||
|
|
||||||
|
Finally, unlike all other (known) Aho-Corasick implementations, this one
|
||||||
|
supports enabling
|
||||||
|
[leftmost-first](enum.MatchKind.html#variant.LeftmostFirst)
|
||||||
|
or
|
||||||
|
[leftmost-longest](enum.MatchKind.html#variant.LeftmostFirst)
|
||||||
|
match semantics, using a (seemingly) novel alternative construction algorithm.
|
||||||
|
For more details on what match semantics means, see the
|
||||||
|
[`MatchKind`](enum.MatchKind.html)
|
||||||
|
type.
|
||||||
|
|
||||||
|
# Overview
|
||||||
|
|
||||||
|
This section gives a brief overview of the primary types in this crate:
|
||||||
|
|
||||||
|
* [`AhoCorasick`](struct.AhoCorasick.html) is the primary type and represents
|
||||||
|
an Aho-Corasick automaton. This is the type you use to execute searches.
|
||||||
|
* [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) can be used to build
|
||||||
|
an Aho-Corasick automaton, and supports configuring a number of options.
|
||||||
|
* [`Match`](struct.Match.html) represents a single match reported by an
|
||||||
|
Aho-Corasick automaton. Each match has two pieces of information: the pattern
|
||||||
|
that matched and the start and end byte offsets corresponding to the position
|
||||||
|
in the haystack at which it matched.
|
||||||
|
|
||||||
|
Additionally, the [`packed`](packed/index.html) sub-module contains a lower
|
||||||
|
level API for using fast vectorized routines for finding a small number of
|
||||||
|
patterns in a haystack.
|
||||||
|
|
||||||
|
# Example: basic searching
|
||||||
|
|
||||||
|
This example shows how to search for occurrences of multiple patterns
|
||||||
|
simultaneously. Each match includes the pattern that matched along with the
|
||||||
|
byte offsets of the match.
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::AhoCorasick;
|
||||||
|
|
||||||
|
let patterns = &["apple", "maple", "Snapple"];
|
||||||
|
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns);
|
||||||
|
let mut matches = vec![];
|
||||||
|
for mat in ac.find_iter(haystack) {
|
||||||
|
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||||
|
}
|
||||||
|
assert_eq!(matches, vec![
|
||||||
|
(1, 13, 18),
|
||||||
|
(0, 28, 33),
|
||||||
|
(2, 43, 50),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
# Example: case insensitivity
|
||||||
|
|
||||||
|
This is like the previous example, but matches `Snapple` case insensitively
|
||||||
|
using `AhoCorasickBuilder`:
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::AhoCorasickBuilder;
|
||||||
|
|
||||||
|
let patterns = &["apple", "maple", "snapple"];
|
||||||
|
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||||
|
|
||||||
|
let ac = AhoCorasickBuilder::new()
|
||||||
|
.ascii_case_insensitive(true)
|
||||||
|
.build(patterns);
|
||||||
|
let mut matches = vec![];
|
||||||
|
for mat in ac.find_iter(haystack) {
|
||||||
|
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||||
|
}
|
||||||
|
assert_eq!(matches, vec![
|
||||||
|
(1, 13, 18),
|
||||||
|
(0, 28, 33),
|
||||||
|
(2, 43, 50),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
# Example: replacing matches in a stream
|
||||||
|
|
||||||
|
This example shows how to execute a search and replace on a stream without
|
||||||
|
loading the entire stream into memory first.
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::AhoCorasick;
|
||||||
|
|
||||||
|
# fn example() -> Result<(), ::std::io::Error> {
|
||||||
|
let patterns = &["fox", "brown", "quick"];
|
||||||
|
let replace_with = &["sloth", "grey", "slow"];
|
||||||
|
|
||||||
|
// In a real example, these might be `std::fs::File`s instead. All you need to
|
||||||
|
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
|
||||||
|
let rdr = "The quick brown fox.";
|
||||||
|
let mut wtr = vec![];
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns);
|
||||||
|
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
|
||||||
|
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
|
||||||
|
# Ok(()) }; example().unwrap()
|
||||||
|
```
|
||||||
|
|
||||||
|
# Example: finding the leftmost first match
|
||||||
|
|
||||||
|
In the textbook description of Aho-Corasick, its formulation is typically
|
||||||
|
structured such that it reports all possible matches, even when they overlap
|
||||||
|
with another. In many cases, overlapping matches may not be desired, such as
|
||||||
|
the case of finding all successive non-overlapping matches like you might with
|
||||||
|
a standard regular expression.
|
||||||
|
|
||||||
|
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
|
||||||
|
this doesn't always work in the expected way, since it will report matches as
|
||||||
|
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
|
||||||
|
against the text `Samwise`. Most regex engines (that are Perl-like, or
|
||||||
|
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
|
||||||
|
algorithm modified for reporting non-overlapping matches will report `Sam`.
|
||||||
|
|
||||||
|
A novel contribution of this library is the ability to change the match
|
||||||
|
semantics of Aho-Corasick (without additional search time overhead) such that
|
||||||
|
`Samwise` is reported instead. For example, here's the standard approach:
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::AhoCorasick;
|
||||||
|
|
||||||
|
let patterns = &["Samwise", "Sam"];
|
||||||
|
let haystack = "Samwise";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns);
|
||||||
|
let mat = ac.find(haystack).expect("should have a match");
|
||||||
|
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
|
||||||
|
```
|
||||||
|
|
||||||
|
And now here's the leftmost-first version, which matches how a Perl-like
|
||||||
|
regex will work:
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::{AhoCorasickBuilder, MatchKind};
|
||||||
|
|
||||||
|
let patterns = &["Samwise", "Sam"];
|
||||||
|
let haystack = "Samwise";
|
||||||
|
|
||||||
|
let ac = AhoCorasickBuilder::new()
|
||||||
|
.match_kind(MatchKind::LeftmostFirst)
|
||||||
|
.build(patterns);
|
||||||
|
let mat = ac.find(haystack).expect("should have a match");
|
||||||
|
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
|
||||||
|
```
|
||||||
|
|
||||||
|
In addition to leftmost-first semantics, this library also supports
|
||||||
|
leftmost-longest semantics, which match the POSIX behavior of a regular
|
||||||
|
expression alternation. See
|
||||||
|
[`MatchKind`](enum.MatchKind.html)
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
# Prefilters
|
||||||
|
|
||||||
|
While an Aho-Corasick automaton can perform admirably when compared to more
|
||||||
|
naive solutions, it is generally slower than more specialized algorithms that
|
||||||
|
are accelerated using vector instructions such as SIMD.
|
||||||
|
|
||||||
|
For that reason, this library will internally use a "prefilter" to attempt
|
||||||
|
to accelerate searches when possible. Currently, this library has several
|
||||||
|
different algorithms it might use depending on the patterns provided. Once the
|
||||||
|
number of patterns gets too big, prefilters are no longer used.
|
||||||
|
|
||||||
|
While a prefilter is generally good to have on by default since it works
|
||||||
|
well in the common case, it can lead to less predictable or even sub-optimal
|
||||||
|
performance in some cases. For that reason, prefilters can be explicitly
|
||||||
|
disabled via
|
||||||
|
[`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter).
|
||||||
|
*/
|
||||||
|
|
||||||
|
#![deny(missing_docs)]
|
||||||
|
|
||||||
|
// We can never be truly no_std, but we could be alloc-only some day, so
|
||||||
|
// require the std feature for now.
|
||||||
|
#[cfg(not(feature = "std"))]
|
||||||
|
compile_error!("`std` feature is currently required to build this crate");
|
||||||
|
|
||||||
|
extern crate memchr;
|
||||||
|
// #[cfg(doctest)]
|
||||||
|
// #[macro_use]
|
||||||
|
// extern crate doc_comment;
|
||||||
|
|
||||||
|
// #[cfg(doctest)]
|
||||||
|
// doctest!("../README.md");
|
||||||
|
|
||||||
|
pub use ahocorasick::{
|
||||||
|
AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind,
|
||||||
|
StreamFindIter,
|
||||||
|
};
|
||||||
|
pub use error::{Error, ErrorKind};
|
||||||
|
pub use state_id::StateID;
|
||||||
|
|
||||||
|
mod ahocorasick;
|
||||||
|
mod automaton;
|
||||||
|
mod buffer;
|
||||||
|
mod byte_frequencies;
|
||||||
|
mod classes;
|
||||||
|
mod dfa;
|
||||||
|
mod error;
|
||||||
|
mod nfa;
|
||||||
|
pub mod packed;
|
||||||
|
mod prefilter;
|
||||||
|
mod state_id;
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests;
|
||||||
|
|
||||||
|
/// A representation of a match reported by an Aho-Corasick automaton.
|
||||||
|
///
|
||||||
|
/// A match has two essential pieces of information: the identifier of the
|
||||||
|
/// pattern that matched, along with the start and end offsets of the match
|
||||||
|
/// in the haystack.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::AhoCorasick;
|
||||||
|
///
|
||||||
|
/// let ac = AhoCorasick::new(&[
|
||||||
|
/// "foo", "bar", "baz",
|
||||||
|
/// ]);
|
||||||
|
/// let mat = ac.find("xxx bar xxx").expect("should have a match");
|
||||||
|
/// assert_eq!(1, mat.pattern());
|
||||||
|
/// assert_eq!(4, mat.start());
|
||||||
|
/// assert_eq!(7, mat.end());
|
||||||
|
/// ```
|
||||||
|
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
|
||||||
|
pub struct Match {
|
||||||
|
/// The pattern id.
|
||||||
|
pattern: usize,
|
||||||
|
/// The length of this match, such that the starting position of the match
|
||||||
|
/// is `end - len`.
|
||||||
|
///
|
||||||
|
/// We use length here because, other than the pattern id, the only
|
||||||
|
/// information about each pattern that the automaton stores is its length.
|
||||||
|
/// So using the length here is just a bit more natural. But it isn't
|
||||||
|
/// technically required.
|
||||||
|
len: usize,
|
||||||
|
/// The end offset of the match, exclusive.
|
||||||
|
end: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Match {
|
||||||
|
/// Returns the identifier of the pattern that matched.
|
||||||
|
///
|
||||||
|
/// The identifier of a pattern is derived from the position in which it
|
||||||
|
/// was originally inserted into the corresponding automaton. The first
|
||||||
|
/// pattern has identifier `0`, and each subsequent pattern is `1`, `2`
|
||||||
|
/// and so on.
|
||||||
|
#[inline]
|
||||||
|
pub fn pattern(&self) -> usize {
|
||||||
|
self.pattern
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The starting position of the match.
|
||||||
|
#[inline]
|
||||||
|
pub fn start(&self) -> usize {
|
||||||
|
self.end - self.len
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The ending position of the match.
|
||||||
|
#[inline]
|
||||||
|
pub fn end(&self) -> usize {
|
||||||
|
self.end
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if this match is empty. That is, when
|
||||||
|
/// `start() == end()`.
|
||||||
|
///
|
||||||
|
/// An empty match can only be returned when the empty string was among
|
||||||
|
/// the patterns used to build the Aho-Corasick automaton.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.len == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn increment(&self, by: usize) -> Match {
|
||||||
|
Match { pattern: self.pattern, len: self.len, end: self.end + by }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn from_span(id: usize, start: usize, end: usize) -> Match {
|
||||||
|
Match { pattern: id, len: end - start, end }
|
||||||
|
}
|
||||||
|
}
|
|
@ -858,10 +858,17 @@ impl<'a, S: StateID> Compiler<'a, S> {
|
||||||
while let Some(id) = queue.pop_front() {
|
while let Some(id) = queue.pop_front() {
|
||||||
let mut it = self.nfa.iter_transitions_mut(id);
|
let mut it = self.nfa.iter_transitions_mut(id);
|
||||||
while let Some((b, next)) = it.next() {
|
while let Some((b, next)) = it.next() {
|
||||||
if !seen.contains(next) {
|
if seen.contains(next) {
|
||||||
queue.push_back(next);
|
// The only way to visit a duplicate state in a transition
|
||||||
seen.insert(next);
|
// list is when ASCII case insensitivity is enabled. In
|
||||||
|
// this case, we want to skip it since it's redundant work.
|
||||||
|
// But it would also end up duplicating matches, which
|
||||||
|
// results in reporting duplicate matches in some cases.
|
||||||
|
// See the 'acasei010' regression test.
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
queue.push_back(next);
|
||||||
|
seen.insert(next);
|
||||||
|
|
||||||
let mut fail = it.nfa().state(id).fail;
|
let mut fail = it.nfa().state(id).fail;
|
||||||
while it.nfa().state(fail).next_state(b) == fail_id() {
|
while it.nfa().state(fail).next_state(b) == fail_id() {
|
||||||
|
@ -1012,10 +1019,17 @@ impl<'a, S: StateID> Compiler<'a, S> {
|
||||||
|
|
||||||
// Queue up the next state.
|
// Queue up the next state.
|
||||||
let next = item.next_queued_state(it.nfa(), next_id);
|
let next = item.next_queued_state(it.nfa(), next_id);
|
||||||
if !seen.contains(next.id) {
|
if seen.contains(next.id) {
|
||||||
queue.push_back(next);
|
// The only way to visit a duplicate state in a transition
|
||||||
seen.insert(next.id);
|
// list is when ASCII case insensitivity is enabled. In
|
||||||
|
// this case, we want to skip it since it's redundant work.
|
||||||
|
// But it would also end up duplicating matches, which
|
||||||
|
// results in reporting duplicate matches in some cases.
|
||||||
|
// See the 'acasei010' regression test.
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
queue.push_back(next);
|
||||||
|
seen.insert(next.id);
|
||||||
|
|
||||||
// Find the failure state for next. Same as standard.
|
// Find the failure state for next. Same as standard.
|
||||||
let mut fail = it.nfa().state(item.id).fail;
|
let mut fail = it.nfa().state(item.id).fail;
|
|
@ -80,6 +80,17 @@ pub trait Prefilter:
|
||||||
fn reports_false_positives(&self) -> bool {
|
fn reports_false_positives(&self) -> bool {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if this prefilter may look for a non-starting
|
||||||
|
/// position of a match.
|
||||||
|
///
|
||||||
|
/// This is useful in a streaming context where prefilters that don't look
|
||||||
|
/// for a starting position of a match can be quite difficult to deal with.
|
||||||
|
///
|
||||||
|
/// This returns false by default.
|
||||||
|
fn looks_for_non_start_of_match(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P {
|
impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P {
|
||||||
|
@ -191,6 +202,17 @@ impl PrefilterState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a prefilter state that always disables the prefilter.
|
||||||
|
pub fn disabled() -> PrefilterState {
|
||||||
|
PrefilterState {
|
||||||
|
skips: 0,
|
||||||
|
skipped: 0,
|
||||||
|
max_match_len: 0,
|
||||||
|
inert: true,
|
||||||
|
last_scan_at: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Update this state with the number of bytes skipped on the last
|
/// Update this state with the number of bytes skipped on the last
|
||||||
/// invocation of the prefilter.
|
/// invocation of the prefilter.
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -285,6 +307,7 @@ impl Builder {
|
||||||
/// All patterns added to an Aho-Corasick automaton should be added to this
|
/// All patterns added to an Aho-Corasick automaton should be added to this
|
||||||
/// builder before attempting to construct the prefilter.
|
/// builder before attempting to construct the prefilter.
|
||||||
pub fn build(&self) -> Option<PrefilterObj> {
|
pub fn build(&self) -> Option<PrefilterObj> {
|
||||||
|
// match (self.start_bytes.build(), self.rare_bytes.build()) {
|
||||||
match (self.start_bytes.build(), self.rare_bytes.build()) {
|
match (self.start_bytes.build(), self.rare_bytes.build()) {
|
||||||
// If we could build both start and rare prefilters, then there are
|
// If we could build both start and rare prefilters, then there are
|
||||||
// a few cases in which we'd want to use the start-byte prefilter
|
// a few cases in which we'd want to use the start-byte prefilter
|
||||||
|
@ -663,6 +686,33 @@ impl Prefilter for RareBytesOne {
|
||||||
fn heap_bytes(&self) -> usize {
|
fn heap_bytes(&self) -> usize {
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn looks_for_non_start_of_match(&self) -> bool {
|
||||||
|
// TODO: It should be possible to use a rare byte prefilter in a
|
||||||
|
// streaming context. The main problem is that we usually assume that
|
||||||
|
// if a prefilter has scanned some text and not found anything, then no
|
||||||
|
// match *starts* in that text. This doesn't matter in non-streaming
|
||||||
|
// contexts, but in a streaming context, if we're looking for a byte
|
||||||
|
// that doesn't start at the beginning of a match and don't find it,
|
||||||
|
// then it's still possible for a match to start at the end of the
|
||||||
|
// current buffer content. In order to fix this, the streaming searcher
|
||||||
|
// would need to become aware of prefilters that do this and use the
|
||||||
|
// appropriate offset in various places. It is quite a delicate change
|
||||||
|
// and probably shouldn't be attempted until streaming search has a
|
||||||
|
// better testing strategy. In particular, we'd really like to be able
|
||||||
|
// to vary the buffer size to force strange cases that occur at the
|
||||||
|
// edge of the buffer. If we make the buffer size minimal, then these
|
||||||
|
// cases occur more frequently and easier.
|
||||||
|
//
|
||||||
|
// This is also a bummer because this means that if the prefilter
|
||||||
|
// builder chose a rare byte prefilter, then a streaming search won't
|
||||||
|
// use any prefilter at all because the builder doesn't know how it's
|
||||||
|
// going to be used. Assuming we don't make streaming search aware of
|
||||||
|
// these special types of prefilters as described above, we could fix
|
||||||
|
// this by building a "backup" prefilter that could be used when the
|
||||||
|
// rare byte prefilter could not. But that's a bandaide. Sigh.
|
||||||
|
true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A prefilter for scanning for two "rare" bytes.
|
/// A prefilter for scanning for two "rare" bytes.
|
||||||
|
@ -697,6 +747,11 @@ impl Prefilter for RareBytesTwo {
|
||||||
fn heap_bytes(&self) -> usize {
|
fn heap_bytes(&self) -> usize {
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn looks_for_non_start_of_match(&self) -> bool {
|
||||||
|
// TODO: See Prefilter impl for RareBytesOne.
|
||||||
|
true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A prefilter for scanning for three "rare" bytes.
|
/// A prefilter for scanning for three "rare" bytes.
|
||||||
|
@ -732,6 +787,11 @@ impl Prefilter for RareBytesThree {
|
||||||
fn heap_bytes(&self) -> usize {
|
fn heap_bytes(&self) -> usize {
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn looks_for_non_start_of_match(&self) -> bool {
|
||||||
|
// TODO: See Prefilter impl for RareBytesOne.
|
||||||
|
true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A builder for constructing a starting byte prefilter.
|
/// A builder for constructing a starting byte prefilter.
|
File diff suppressed because it is too large
Load Diff
|
@ -1 +0,0 @@
|
||||||
{"files":{"CHANGELOG.md":"4d03cdc2987a1fa1b86a2de5fa57714d83cbb9d3d3f400eadecd8e8a0a857621","Cargo.toml":"a9ec8b36707f907971b410719b85e9594cb96c9e4bca6f831e2cc78ba22c71da","LICENSE":"0dd39f89842df915b8ded7ac59e8a1372cf5be36133818866cca2ef3af1a2849","README.md":"132383b73044b1e91acb9e5d69afeb8f14239cfe712aca59152bfe0c420f7a33","examples/test.rs":"4e9e73dfe80573296e93f66c2c03681908c278a758dceb4913ecb65d20e9ed86","src/lib.rs":"7a0e852a4bbfbf72c7702527d7c6f7f8c717fca77bfd4b3e78ba7f6cebed4e6f","src/line.rs":"edbdc54503342733f8aa7a4aa72a7cb08d376d53ca2b85e00a77dd42bf04bb22","src/shapes/mod.rs":"071d6ea4080dc8f1e4299258d65c32bccc40e9eb6933f3b3600576d58e7917ae","src/shapes/rectangle.rs":"ad545b9d4a628b3a515deb9b087f881b253d3f3a16a60734da82896d51c93cc9","src/text/fontconfig.rs":"c673bfcf5df387479dd2027a733d8de85461731b448202f49a9f2d1bce54f465","src/text/mod.rs":"4afd25c6297d55cd5a3956e5ae6d3921403b306533a237fe2e5eab33e65a91ee"},"package":"9b7f09f89872c2b6b29e319377b1fbe91c6f5947df19a25596e121cf19a7b35e"}
|
|
|
@ -1,61 +0,0 @@
|
||||||
"""
|
|
||||||
@generated
|
|
||||||
cargo-raze crate build file.
|
|
||||||
|
|
||||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
|
||||||
"""
|
|
||||||
|
|
||||||
# buildifier: disable=load
|
|
||||||
load(
|
|
||||||
"@io_bazel_rules_rust//rust:rust.bzl",
|
|
||||||
"rust_binary",
|
|
||||||
"rust_library",
|
|
||||||
"rust_test",
|
|
||||||
)
|
|
||||||
|
|
||||||
# buildifier: disable=load
|
|
||||||
load("@bazel_skylib//lib:selects.bzl", "selects")
|
|
||||||
|
|
||||||
package(default_visibility = [
|
|
||||||
# Public for visibility by "@raze__crate__version//" targets.
|
|
||||||
#
|
|
||||||
# Prefer access through "//third_party/cargo", which limits external
|
|
||||||
# visibility to explicit Cargo.toml dependencies.
|
|
||||||
"//visibility:public",
|
|
||||||
])
|
|
||||||
|
|
||||||
licenses([
|
|
||||||
"notice", # MIT from expression "MIT"
|
|
||||||
])
|
|
||||||
|
|
||||||
# Generated Targets
|
|
||||||
|
|
||||||
# Unsupported target "test" with type "example" omitted
|
|
||||||
|
|
||||||
rust_library(
|
|
||||||
name = "andrew",
|
|
||||||
srcs = glob(["**/*.rs"]),
|
|
||||||
crate_features = [
|
|
||||||
],
|
|
||||||
crate_root = "src/lib.rs",
|
|
||||||
crate_type = "lib",
|
|
||||||
data = [],
|
|
||||||
edition = "2015",
|
|
||||||
rustc_flags = [
|
|
||||||
"--cap-lints=allow",
|
|
||||||
],
|
|
||||||
tags = [
|
|
||||||
"cargo-raze",
|
|
||||||
"manual",
|
|
||||||
],
|
|
||||||
version = "0.2.1",
|
|
||||||
# buildifier: leave-alone
|
|
||||||
deps = [
|
|
||||||
"//third_party/cargo/vendor/bitflags-1.2.1:bitflags",
|
|
||||||
"//third_party/cargo/vendor/line_drawing-0.7.0:line_drawing",
|
|
||||||
"//third_party/cargo/vendor/walkdir-2.3.1:walkdir",
|
|
||||||
"//third_party/cargo/vendor/xdg-2.2.0:xdg",
|
|
||||||
"//third_party/cargo/vendor/xml-rs-0.8.3:xml_rs",
|
|
||||||
"//third_party/cargo/vendor/rusttype-0.8.3:rusttype",
|
|
||||||
],
|
|
||||||
)
|
|
|
@ -1,47 +0,0 @@
|
||||||
# Change Log
|
|
||||||
|
|
||||||
## Unreleased
|
|
||||||
|
|
||||||
## 0.2.1 -- 2019-03-29
|
|
||||||
|
|
||||||
- Fix `get_width()` for texts that start and end with spaces
|
|
||||||
|
|
||||||
## 0.2.0 -- 2019-01-26
|
|
||||||
|
|
||||||
- **[Breaking]** Canvas is now endian aware and will draw to the buffer in the endianness of the `Endian` its created with
|
|
||||||
|
|
||||||
## 0.1.6 -- 2019-01-24
|
|
||||||
|
|
||||||
- Faster drawing of horizontal and verticle lines by precomputing line boundaries
|
|
||||||
- Only calculate alpha overlay when drawing colors without a non-max alpha value for performance
|
|
||||||
|
|
||||||
## 0.1.5 -- 2019-01-13
|
|
||||||
|
|
||||||
- Fix drawing of characters with negative bounding boxes
|
|
||||||
- Fix error in `get_width()` for text without any characters
|
|
||||||
|
|
||||||
## 0.1.4 -- 2018-11-10
|
|
||||||
|
|
||||||
- Remove rusttype version restriction
|
|
||||||
|
|
||||||
## 0.1.3 -- 2018-10-09
|
|
||||||
|
|
||||||
- Move from `quick-xml` to `xml-rs` dependency
|
|
||||||
|
|
||||||
## 0.1.2 -- 2018-10-04
|
|
||||||
|
|
||||||
- Add basic/experimental support for fontconfig in `andrew::text::fontconfig`
|
|
||||||
|
|
||||||
## 0.1.1 -- 2018-09-17
|
|
||||||
|
|
||||||
- Manage dependencies to maintain rust 1.22 compatibility
|
|
||||||
- Update rusttype to 0.7.1
|
|
||||||
|
|
||||||
## 0.1.0 -- 2018-08-17
|
|
||||||
|
|
||||||
Initial version, including:
|
|
||||||
|
|
||||||
- canvas
|
|
||||||
- lines
|
|
||||||
- rectangles
|
|
||||||
- text
|
|
|
@ -1,41 +0,0 @@
|
||||||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
|
||||||
#
|
|
||||||
# When uploading crates to the registry Cargo will automatically
|
|
||||||
# "normalize" Cargo.toml files for maximal compatibility
|
|
||||||
# with all versions of Cargo and also rewrite `path` dependencies
|
|
||||||
# to registry (e.g. crates.io) dependencies
|
|
||||||
#
|
|
||||||
# If you believe there's an error in this file please file an
|
|
||||||
# issue against the rust-lang/cargo repository. If you're
|
|
||||||
# editing this file be aware that the upstream Cargo.toml
|
|
||||||
# will likely look very different (and much more reasonable)
|
|
||||||
|
|
||||||
[package]
|
|
||||||
name = "andrew"
|
|
||||||
version = "0.2.1"
|
|
||||||
authors = ["Lucas Timmins <timmins.s.lucas@gmail.com>"]
|
|
||||||
description = "The andrew crate provides convenient drawing of objects such as shapes, lines and text to buffers"
|
|
||||||
readme = "README.md"
|
|
||||||
keywords = ["draw", "buffer", "shapes", "lines", "pixels"]
|
|
||||||
categories = ["rendering", "multimedia::images"]
|
|
||||||
license = "MIT"
|
|
||||||
repository = "https://github.com/trimental/andrew"
|
|
||||||
[dependencies.bitflags]
|
|
||||||
version = "1.0.3"
|
|
||||||
|
|
||||||
[dependencies.line_drawing]
|
|
||||||
version = "0.7.0"
|
|
||||||
|
|
||||||
[dependencies.rusttype]
|
|
||||||
version = "0.7.1"
|
|
||||||
|
|
||||||
[dependencies.walkdir]
|
|
||||||
version = "2.0"
|
|
||||||
|
|
||||||
[dependencies.xdg]
|
|
||||||
version = "2.1.0"
|
|
||||||
|
|
||||||
[dependencies.xml-rs]
|
|
||||||
version = "0.8.0"
|
|
||||||
[dev-dependencies.smithay-client-toolkit]
|
|
||||||
version = "0.4.0"
|
|
|
@ -1,3 +0,0 @@
|
||||||
# Andrew
|
|
||||||
|
|
||||||
This crate provides convenient drawing of objects such as shapes, lines and text to buffers
|
|
|
@ -1,210 +0,0 @@
|
||||||
extern crate andrew;
|
|
||||||
extern crate smithay_client_toolkit as sctk;
|
|
||||||
|
|
||||||
use std::io::{Read, Seek, SeekFrom, Write};
|
|
||||||
use std::sync::{Arc, Mutex};
|
|
||||||
use std::time::{Duration, Instant};
|
|
||||||
|
|
||||||
use sctk::utils::{DoubleMemPool, MemPool};
|
|
||||||
use sctk::window::{ConceptFrame, Event as WEvent, Window};
|
|
||||||
use sctk::Environment;
|
|
||||||
|
|
||||||
use sctk::reexports::client::protocol::wl_compositor::RequestsTrait as CompositorRequests;
|
|
||||||
use sctk::reexports::client::protocol::wl_surface::RequestsTrait as SurfaceRequests;
|
|
||||||
use sctk::reexports::client::protocol::{wl_shm, wl_surface};
|
|
||||||
use sctk::reexports::client::{Display, Proxy};
|
|
||||||
|
|
||||||
use andrew::shapes::rectangle;
|
|
||||||
use andrew::text;
|
|
||||||
use andrew::text::fontconfig;
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let (display, mut event_queue) =
|
|
||||||
Display::connect_to_env().expect("Failed to connect to the wayland server.");
|
|
||||||
let env = Environment::from_display(&*display, &mut event_queue).unwrap();
|
|
||||||
|
|
||||||
let seat = env
|
|
||||||
.manager
|
|
||||||
.instantiate_auto(|seat| seat.implement(|_, _| {}, ()))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let mut dimensions = (600, 400);
|
|
||||||
let surface = env
|
|
||||||
.compositor
|
|
||||||
.create_surface(|surface| surface.implement(|_, _| {}, ()))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let next_action = Arc::new(Mutex::new(None::<WEvent>));
|
|
||||||
|
|
||||||
let waction = next_action.clone();
|
|
||||||
let mut window = Window::<ConceptFrame>::init_from_env(&env, surface, dimensions, move |evt| {
|
|
||||||
let mut next_action = waction.lock().unwrap();
|
|
||||||
// Keep last event in priority order : Close > Configure > Refresh
|
|
||||||
let replace = match (&evt, &*next_action) {
|
|
||||||
(_, &None)
|
|
||||||
| (_, &Some(WEvent::Refresh))
|
|
||||||
| (&WEvent::Configure { .. }, &Some(WEvent::Configure { .. }))
|
|
||||||
| (&WEvent::Close, _) => true,
|
|
||||||
_ => false,
|
|
||||||
};
|
|
||||||
if replace {
|
|
||||||
*next_action = Some(evt);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.expect("Failed to create a window !");
|
|
||||||
|
|
||||||
window.new_seat(&seat);
|
|
||||||
|
|
||||||
let mut pools = DoubleMemPool::new(&env.shm, || {}).expect("Failed to create a memory pool !");
|
|
||||||
|
|
||||||
let mut font_data = Vec::new();
|
|
||||||
::std::fs::File::open(
|
|
||||||
&fontconfig::FontConfig::new()
|
|
||||||
.unwrap()
|
|
||||||
.get_regular_family_fonts("sans")
|
|
||||||
.unwrap()[0],
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
.read_to_end(&mut font_data)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
if !env.shell.needs_configure() {
|
|
||||||
if let Some(pool) = pools.pool() {
|
|
||||||
redraw(pool, window.surface(), dimensions, &font_data);
|
|
||||||
}
|
|
||||||
window.refresh();
|
|
||||||
}
|
|
||||||
|
|
||||||
loop {
|
|
||||||
match next_action.lock().unwrap().take() {
|
|
||||||
Some(WEvent::Close) => break,
|
|
||||||
Some(WEvent::Refresh) => {
|
|
||||||
window.refresh();
|
|
||||||
window.surface().commit();
|
|
||||||
}
|
|
||||||
Some(WEvent::Configure { new_size, .. }) => {
|
|
||||||
if let Some((w, h)) = new_size {
|
|
||||||
window.resize(w, h);
|
|
||||||
dimensions = (w, h)
|
|
||||||
}
|
|
||||||
window.refresh();
|
|
||||||
if let Some(pool) = pools.pool() {
|
|
||||||
redraw(pool, window.surface(), dimensions, &font_data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => {}
|
|
||||||
}
|
|
||||||
|
|
||||||
display.flush().unwrap();
|
|
||||||
event_queue.dispatch().unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn redraw(
|
|
||||||
pool: &mut MemPool,
|
|
||||||
surface: &Proxy<wl_surface::WlSurface>,
|
|
||||||
dimensions: (u32, u32),
|
|
||||||
font_data: &[u8],
|
|
||||||
) {
|
|
||||||
let (buf_x, buf_y) = (dimensions.0 as usize, dimensions.1 as usize);
|
|
||||||
|
|
||||||
pool.resize(4 * buf_x * buf_y)
|
|
||||||
.expect("Failed to resize the memory pool.");
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = vec![255; 4 * buf_x * buf_y];
|
|
||||||
let mut canvas =
|
|
||||||
andrew::Canvas::new(&mut buf, buf_x, buf_y, 4 * buf_x, andrew::Endian::native());
|
|
||||||
|
|
||||||
println!("______________");
|
|
||||||
let mut total_dur = Duration::new(0, 0);
|
|
||||||
|
|
||||||
// Draw background
|
|
||||||
let (block_w, block_h) = (buf_x / 20, buf_y / 20);
|
|
||||||
for block_y in 0..21 {
|
|
||||||
for block_x in 0..21 {
|
|
||||||
let color = if (block_x + (block_y % 2)) % 2 == 0 {
|
|
||||||
[255, 0, 0, 0]
|
|
||||||
} else {
|
|
||||||
[255, 255, 255, 255]
|
|
||||||
};
|
|
||||||
|
|
||||||
let block = rectangle::Rectangle::new(
|
|
||||||
(block_w * block_x, block_h * block_y),
|
|
||||||
(block_w, block_h),
|
|
||||||
None,
|
|
||||||
Some(color),
|
|
||||||
);
|
|
||||||
let timer = Instant::now();
|
|
||||||
canvas.draw(&block);
|
|
||||||
total_dur += timer.elapsed()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
println!("Background draw time: {:?}", total_dur);
|
|
||||||
|
|
||||||
let rectangle = rectangle::Rectangle::new(
|
|
||||||
(buf_x / 30, buf_y / 4),
|
|
||||||
(buf_x - (buf_x / 30) * 2, buf_y - buf_y / 2),
|
|
||||||
Some((
|
|
||||||
15,
|
|
||||||
[255, 170, 20, 45],
|
|
||||||
rectangle::Sides::TOP ^ rectangle::Sides::BOTTOM,
|
|
||||||
Some(10),
|
|
||||||
)),
|
|
||||||
Some([255, 170, 20, 45]),
|
|
||||||
);
|
|
||||||
let mut timer = Instant::now();
|
|
||||||
canvas.draw(&rectangle);
|
|
||||||
println!("Rectangle draw time: {:?}", timer.elapsed());
|
|
||||||
total_dur += timer.elapsed();
|
|
||||||
|
|
||||||
let text_h = buf_x as f32 / 80.;
|
|
||||||
let text_hh = text_h / 2.;
|
|
||||||
let mut text = text::Text::new(
|
|
||||||
(63, 69),
|
|
||||||
[255, 255, 255, 255],
|
|
||||||
font_data,
|
|
||||||
text_h,
|
|
||||||
2.0,
|
|
||||||
"“Life is the art of drawing without an eraser.” - John W. Gardner",
|
|
||||||
);
|
|
||||||
text.pos = (
|
|
||||||
buf_x / 2 - text.get_width() / 2,
|
|
||||||
buf_y / 2 - text_hh as usize,
|
|
||||||
);
|
|
||||||
|
|
||||||
let text_box = rectangle::Rectangle::new(
|
|
||||||
(
|
|
||||||
buf_x / 2 - text.get_width() / 2 - 10,
|
|
||||||
buf_y / 2 - text_hh as usize - 10,
|
|
||||||
),
|
|
||||||
(text.get_width() + 20, text_h as usize + 20),
|
|
||||||
Some((3, [255, 255, 255, 255], rectangle::Sides::ALL, Some(5))),
|
|
||||||
None,
|
|
||||||
);
|
|
||||||
|
|
||||||
timer = Instant::now();
|
|
||||||
canvas.draw(&text_box);
|
|
||||||
println!("Text box draw time: {:?}", timer.elapsed());
|
|
||||||
total_dur += timer.elapsed();
|
|
||||||
|
|
||||||
timer = Instant::now();
|
|
||||||
canvas.draw(&text);
|
|
||||||
println!("Text draw time: {:?}", timer.elapsed());
|
|
||||||
total_dur += timer.elapsed();
|
|
||||||
|
|
||||||
println!("Total draw time: {:?}", total_dur);
|
|
||||||
|
|
||||||
pool.seek(SeekFrom::Start(0)).unwrap();
|
|
||||||
pool.write_all(canvas.buffer).unwrap();
|
|
||||||
pool.flush().unwrap();
|
|
||||||
|
|
||||||
let new_buffer = pool.buffer(
|
|
||||||
0,
|
|
||||||
buf_x as i32,
|
|
||||||
buf_y as i32,
|
|
||||||
4 * buf_x as i32,
|
|
||||||
wl_shm::Format::Argb8888,
|
|
||||||
);
|
|
||||||
surface.attach(Some(&new_buffer), 0, 0);
|
|
||||||
surface.commit();
|
|
||||||
}
|
|
|
@ -1,134 +0,0 @@
|
||||||
//! Andrew is a crate for drawing objects
|
|
||||||
#![warn(missing_docs)]
|
|
||||||
extern crate line_drawing;
|
|
||||||
extern crate rusttype;
|
|
||||||
extern crate walkdir;
|
|
||||||
extern crate xdg;
|
|
||||||
extern crate xml;
|
|
||||||
|
|
||||||
#[macro_use]
|
|
||||||
extern crate bitflags;
|
|
||||||
|
|
||||||
/// A module that contains functions and objects relating to lines
|
|
||||||
pub mod line;
|
|
||||||
/// A module that contains functions and objects relating to shapes
|
|
||||||
pub mod shapes;
|
|
||||||
/// A module that contains functions and objects relating to text
|
|
||||||
pub mod text;
|
|
||||||
|
|
||||||
/// The Drawable trait allows object to be drawn to a buffer or canvas
|
|
||||||
pub trait Drawable {
|
|
||||||
/// A function that draws the object to a canvas
|
|
||||||
fn draw(&self, canvas: &mut Canvas);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Describes an endianness (aka byte order)
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub enum Endian {
|
|
||||||
/// Little Endian
|
|
||||||
Little,
|
|
||||||
/// Big Endian
|
|
||||||
Big,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Endian {
|
|
||||||
/// Returns the native endianness
|
|
||||||
pub fn native() -> Endian {
|
|
||||||
if cfg!(target_endian = "little") {
|
|
||||||
Endian::Little
|
|
||||||
} else {
|
|
||||||
Endian::Big
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The canvas object acts as a wrapper around a buffer, providing information and functions
|
|
||||||
/// for drawing
|
|
||||||
pub struct Canvas<'a> {
|
|
||||||
/// A buffer for the canvas to draw to
|
|
||||||
pub buffer: &'a mut [u8],
|
|
||||||
/// The width in pixels of the canvas
|
|
||||||
pub width: usize,
|
|
||||||
/// The height in pixels of the canvas
|
|
||||||
pub height: usize,
|
|
||||||
/// The number of bytes between each line of pixels on the canvas
|
|
||||||
pub stride: usize,
|
|
||||||
/// The number of bytes contained in each pixel
|
|
||||||
pub pixel_size: usize,
|
|
||||||
/// The endianness of the canvas
|
|
||||||
pub endianness: Endian,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Canvas<'a> {
|
|
||||||
/// Creates a new canvas object
|
|
||||||
pub fn new(
|
|
||||||
buffer: &'a mut [u8],
|
|
||||||
width: usize,
|
|
||||||
height: usize,
|
|
||||||
stride: usize,
|
|
||||||
endianness: Endian,
|
|
||||||
) -> Canvas<'a> {
|
|
||||||
assert!(
|
|
||||||
stride % width == 0,
|
|
||||||
"Incorrect Dimensions - Stride is not a multiple of width"
|
|
||||||
);
|
|
||||||
assert!(buffer.len() == stride * height);
|
|
||||||
let pixel_size = stride / width;
|
|
||||||
Canvas {
|
|
||||||
buffer,
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
stride,
|
|
||||||
pixel_size,
|
|
||||||
endianness,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Draws an object that implements the Drawable trait to the buffer
|
|
||||||
pub fn draw<D: Drawable>(&mut self, drawable: &D) {
|
|
||||||
drawable.draw(self);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Draws a pixel at the x and y coordinate
|
|
||||||
pub fn draw_point(&mut self, x: usize, y: usize, color: [u8; 4]) {
|
|
||||||
let base = self.stride * y + self.pixel_size * x;
|
|
||||||
if self.endianness == Endian::Little {
|
|
||||||
if color[0] == 255 {
|
|
||||||
self.buffer[base + 3] = color[0];
|
|
||||||
self.buffer[base + 2] = color[1];
|
|
||||||
self.buffer[base + 1] = color[2];
|
|
||||||
self.buffer[base] = color[3];
|
|
||||||
} else {
|
|
||||||
for c in 0..3 {
|
|
||||||
let alpha = f32::from(color[0]) / 255.0;
|
|
||||||
let color_diff =
|
|
||||||
(color[3 - c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
|
|
||||||
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
|
|
||||||
self.buffer[base + c] = new_color as u8;
|
|
||||||
}
|
|
||||||
self.buffer[base + 3] = 255 as u8;
|
|
||||||
}
|
|
||||||
} else if color[0] == 255 {
|
|
||||||
self.buffer[base] = color[0];
|
|
||||||
self.buffer[base + 1] = color[1];
|
|
||||||
self.buffer[base + 2] = color[2];
|
|
||||||
self.buffer[base + 3] = color[3];
|
|
||||||
} else {
|
|
||||||
for c in 1..4 {
|
|
||||||
let alpha = f32::from(color[0]) / 255.0;
|
|
||||||
let color_diff =
|
|
||||||
(color[c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
|
|
||||||
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
|
|
||||||
self.buffer[base + c] = new_color as u8;
|
|
||||||
}
|
|
||||||
self.buffer[base] = 255 as u8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Clears the entire canvas buffer by zeroing it
|
|
||||||
pub fn clear(&mut self) {
|
|
||||||
for i in 0..self.width * self.height * 4 {
|
|
||||||
self.buffer[i] = 0x00;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,84 +0,0 @@
|
||||||
use std::cmp::min;
|
|
||||||
|
|
||||||
use line_drawing::Bresenham;
|
|
||||||
use line_drawing::XiaolinWu;
|
|
||||||
|
|
||||||
use Canvas;
|
|
||||||
use Drawable;
|
|
||||||
|
|
||||||
/// A drawable object that represents a line
|
|
||||||
pub struct Line {
|
|
||||||
/// The first point of the line
|
|
||||||
pub pt1: (usize, usize),
|
|
||||||
/// The second point of the line
|
|
||||||
pub pt2: (usize, usize),
|
|
||||||
/// The color of the line
|
|
||||||
pub color: [u8; 4],
|
|
||||||
/// Decides whether the line will be antialiased
|
|
||||||
pub antialiased: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Line {
|
|
||||||
/// Creates a new Line object
|
|
||||||
pub fn new(
|
|
||||||
pt1: (usize, usize),
|
|
||||||
pt2: (usize, usize),
|
|
||||||
color: [u8; 4],
|
|
||||||
antialiased: bool,
|
|
||||||
) -> Line {
|
|
||||||
Line {
|
|
||||||
pt1,
|
|
||||||
pt2,
|
|
||||||
color,
|
|
||||||
antialiased,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drawable for Line {
|
|
||||||
fn draw(&self, canvas: &mut Canvas) {
|
|
||||||
if !self.antialiased {
|
|
||||||
if self.pt1.0 == self.pt2.0 && self.pt1.0 < canvas.width {
|
|
||||||
let (min_y, max_y) = if self.pt1.1 > self.pt2.1 {
|
|
||||||
(self.pt2.1, self.pt1.1)
|
|
||||||
} else {
|
|
||||||
(self.pt1.1, self.pt2.1)
|
|
||||||
};
|
|
||||||
for y in min_y..min(max_y, canvas.height - 1) + 1 {
|
|
||||||
canvas.draw_point(self.pt1.0, y, self.color)
|
|
||||||
}
|
|
||||||
} else if self.pt1.1 == self.pt2.1 && self.pt1.1 < canvas.height {
|
|
||||||
let (min_x, max_x) = if self.pt1.0 > self.pt2.0 {
|
|
||||||
(self.pt2.0, self.pt1.0)
|
|
||||||
} else {
|
|
||||||
(self.pt1.0, self.pt2.0)
|
|
||||||
};
|
|
||||||
for x in min_x..min(max_x, canvas.width - 1) + 1 {
|
|
||||||
canvas.draw_point(x, self.pt1.1, self.color)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Angled line without antialias
|
|
||||||
for (x, y) in Bresenham::new(
|
|
||||||
(self.pt1.0 as isize, self.pt1.1 as isize),
|
|
||||||
(self.pt2.0 as isize, self.pt2.1 as isize),
|
|
||||||
) {
|
|
||||||
if x < canvas.width as isize && y < canvas.height as isize {
|
|
||||||
canvas.draw_point(x as usize, y as usize, self.color)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Angled line with antialias
|
|
||||||
for ((x, y), coverage) in XiaolinWu::<f32, isize>::new(
|
|
||||||
(self.pt1.0 as f32, self.pt1.1 as f32),
|
|
||||||
(self.pt2.0 as f32, self.pt2.1 as f32),
|
|
||||||
) {
|
|
||||||
if x < canvas.width as isize && y < canvas.height as isize {
|
|
||||||
let mut color = self.color;
|
|
||||||
color[3] = (f32::from(color[3]) * coverage) as u8;
|
|
||||||
canvas.draw_point(x as usize, y as usize, color)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,153 +0,0 @@
|
||||||
use line::Line;
|
|
||||||
use Canvas;
|
|
||||||
use Drawable;
|
|
||||||
|
|
||||||
bitflags! {
|
|
||||||
/// The Sides bitflag presents the sides of a rectangle
|
|
||||||
pub struct Sides: u32 {
|
|
||||||
/// The top side of the rectangle
|
|
||||||
const TOP = 0b0001;
|
|
||||||
/// The bottom side of the rectangle
|
|
||||||
const BOTTOM = 0b0010;
|
|
||||||
/// The left side of the rectangle
|
|
||||||
const LEFT = 0b0100;
|
|
||||||
/// The right side of the rectangle
|
|
||||||
const RIGHT = 0b1000;
|
|
||||||
/// All sides of the rectangle
|
|
||||||
const ALL = Self::TOP.bits | Self::BOTTOM.bits | Self::LEFT.bits | Self::RIGHT.bits;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A drawable object that represents a rectangle
|
|
||||||
pub struct Rectangle {
|
|
||||||
/// Position of the top-left corner of rectangle
|
|
||||||
pub pos: (usize, usize),
|
|
||||||
/// The size of the rectangle to be drawn, the border will be contained within this size
|
|
||||||
pub size: (usize, usize),
|
|
||||||
/// The border that is drawn around the perimeter of the rectangle. It's arguments are
|
|
||||||
/// thickness of border, color of border, sides that the border is drawn around, rounding size
|
|
||||||
/// of the corners
|
|
||||||
pub border: Option<(usize, [u8; 4], Sides, Option<usize>)>,
|
|
||||||
/// The color of the fill (area) of the rectangle
|
|
||||||
pub fill: Option<[u8; 4]>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Rectangle {
|
|
||||||
/// Creates a new Rectangle object
|
|
||||||
pub fn new(
|
|
||||||
pos: (usize, usize),
|
|
||||||
size: (usize, usize),
|
|
||||||
border: Option<(usize, [u8; 4], Sides, Option<usize>)>,
|
|
||||||
fill: Option<[u8; 4]>,
|
|
||||||
) -> Rectangle {
|
|
||||||
Rectangle {
|
|
||||||
pos,
|
|
||||||
size,
|
|
||||||
border,
|
|
||||||
fill,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn draw_borders(&self, canvas: &mut Canvas) {
|
|
||||||
if let Some(border) = self.border {
|
|
||||||
for i in 0..border.0 {
|
|
||||||
let rounding_space = if let Some(round_size) = border.3 {
|
|
||||||
if i < round_size {
|
|
||||||
round_size
|
|
||||||
- ((round_size as f32).powi(2) - ((round_size - i - 1) as f32).powi(2))
|
|
||||||
.sqrt()
|
|
||||||
.round() as usize
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
};
|
|
||||||
|
|
||||||
// Top line
|
|
||||||
if border.2.contains(Sides::TOP) && canvas.width > rounding_space * 2 {
|
|
||||||
Line::new(
|
|
||||||
(self.pos.0 + rounding_space, self.pos.1 + i),
|
|
||||||
(self.pos.0 + self.size.0 - rounding_space, self.pos.1 + i),
|
|
||||||
border.1,
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.draw(canvas);
|
|
||||||
}
|
|
||||||
// Bottom line
|
|
||||||
if border.2.contains(Sides::BOTTOM) && canvas.width > rounding_space * 2 {
|
|
||||||
Line::new(
|
|
||||||
(self.pos.0 + rounding_space, self.pos.1 + self.size.1 - i),
|
|
||||||
(
|
|
||||||
self.pos.0 + self.size.0 - rounding_space,
|
|
||||||
self.pos.1 + self.size.1 - i,
|
|
||||||
),
|
|
||||||
border.1,
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.draw(canvas);
|
|
||||||
}
|
|
||||||
// Left line
|
|
||||||
if border.2.contains(Sides::LEFT) && canvas.height > rounding_space * 2 {
|
|
||||||
Line::new(
|
|
||||||
(self.pos.0 + i, self.pos.1 + rounding_space),
|
|
||||||
(self.pos.0 + i, self.pos.1 + self.size.1 - rounding_space),
|
|
||||||
border.1,
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.draw(canvas);
|
|
||||||
}
|
|
||||||
// Right line
|
|
||||||
if border.2.contains(Sides::RIGHT) && canvas.height > rounding_space * 2 {
|
|
||||||
Line::new(
|
|
||||||
(self.pos.0 + self.size.0 - i, self.pos.1 + rounding_space),
|
|
||||||
(
|
|
||||||
self.pos.0 + self.size.0 - i,
|
|
||||||
self.pos.1 + self.size.1 - rounding_space,
|
|
||||||
),
|
|
||||||
border.1,
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.draw(canvas);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn draw_area(&self, canvas: &mut Canvas) {
|
|
||||||
if let Some(fill) = self.fill {
|
|
||||||
let (area_pos, area_size) = self.measure_area();
|
|
||||||
for y in area_pos.1..area_pos.1 + area_size.1 + 1 {
|
|
||||||
Line::new((area_pos.0, y), (area_pos.0 + area_size.0, y), fill, false).draw(canvas)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn measure_area(&self) -> ((usize, usize), (usize, usize)) {
|
|
||||||
let (mut area_pos, mut area_size) = (self.pos, self.size);
|
|
||||||
if let Some(border) = self.border {
|
|
||||||
if border.2.contains(Sides::TOP) {
|
|
||||||
area_pos.1 += border.0;
|
|
||||||
area_size.1 -= border.0;
|
|
||||||
}
|
|
||||||
if border.2.contains(Sides::BOTTOM) {
|
|
||||||
area_size.1 -= border.0;
|
|
||||||
}
|
|
||||||
if border.2.contains(Sides::LEFT) {
|
|
||||||
area_pos.0 += border.0;
|
|
||||||
area_size.0 -= border.0;
|
|
||||||
}
|
|
||||||
if border.2.contains(Sides::RIGHT) {
|
|
||||||
area_size.0 -= border.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(area_pos, area_size)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drawable for Rectangle {
|
|
||||||
fn draw(&self, canvas: &mut Canvas) {
|
|
||||||
self.draw_borders(canvas);
|
|
||||||
self.draw_area(canvas);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,121 +0,0 @@
|
||||||
/// A module that contains functions and objects relating to fontconfig
|
|
||||||
pub mod fontconfig;
|
|
||||||
|
|
||||||
use rusttype::{point, Font, Scale, SharedBytes, VMetrics};
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::Read;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
use Canvas;
|
|
||||||
use Drawable;
|
|
||||||
|
|
||||||
/// A drawable object that represents text
|
|
||||||
pub struct Text<'a> {
|
|
||||||
/// The position of the text on the canvas
|
|
||||||
pub pos: (usize, usize),
|
|
||||||
/// The color of the text
|
|
||||||
pub color: [u8; 4],
|
|
||||||
/// The text that is rendered to the canvas on draw
|
|
||||||
pub text: String,
|
|
||||||
/// The font used in rendering the text
|
|
||||||
pub font: Font<'a>,
|
|
||||||
/// The scale that is applied to the text
|
|
||||||
pub scale: Scale,
|
|
||||||
/// The vertical metrics of the text
|
|
||||||
pub v_metrics: VMetrics,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Loads a font file into a `Vec<u8>`
|
|
||||||
pub fn load_font_file<P: Into<PathBuf>>(path: P) -> Vec<u8> {
|
|
||||||
let mut data: Vec<u8> = Vec::new();
|
|
||||||
let mut file = File::open(path.into()).expect("Could not open font file");
|
|
||||||
file.read_to_end(&mut data)
|
|
||||||
.expect("Could not read font file");
|
|
||||||
data
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Text<'a> {
|
|
||||||
/// Creates a new Text object
|
|
||||||
pub fn new<P: Into<SharedBytes<'a>>, T: Into<String>>(
|
|
||||||
pos: (usize, usize),
|
|
||||||
color: [u8; 4],
|
|
||||||
font_data: P,
|
|
||||||
height: f32,
|
|
||||||
width_scale: f32,
|
|
||||||
text: T,
|
|
||||||
) -> Text<'a> {
|
|
||||||
let text = text.into();
|
|
||||||
// Create font
|
|
||||||
let font = Font::from_bytes(font_data).expect("Error constructing Font");
|
|
||||||
// Create scale
|
|
||||||
let scale = Scale {
|
|
||||||
x: height * width_scale,
|
|
||||||
y: height,
|
|
||||||
};
|
|
||||||
// Create needed metrics
|
|
||||||
let v_metrics = font.v_metrics(scale);
|
|
||||||
Text {
|
|
||||||
pos,
|
|
||||||
color,
|
|
||||||
text: text.clone(),
|
|
||||||
scale,
|
|
||||||
v_metrics,
|
|
||||||
font,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn draw_text(&self, canvas: &mut Canvas) {
|
|
||||||
let glyphs: Vec<_> = self
|
|
||||||
.font
|
|
||||||
.layout(&self.text, self.scale, point(0.0, self.v_metrics.ascent))
|
|
||||||
.collect();
|
|
||||||
for glyph in glyphs {
|
|
||||||
if let Some(bounding_box) = glyph.pixel_bounding_box() {
|
|
||||||
glyph.draw(|x, y, v| {
|
|
||||||
let x = ((x as usize + self.pos.0) as i32 + bounding_box.min.x) as usize;
|
|
||||||
let y = ((y as usize + self.pos.1) as i32 + bounding_box.min.y) as usize;
|
|
||||||
|
|
||||||
if x < canvas.width && y < canvas.height {
|
|
||||||
let mut color = self.color;
|
|
||||||
color[0] = (f32::from(color[0]) * v) as u8;
|
|
||||||
canvas.draw_point(x, y, color);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Calculates the width in pixels of the text
|
|
||||||
pub fn get_width(&self) -> usize {
|
|
||||||
let glyphs: Vec<_> = self
|
|
||||||
.font
|
|
||||||
.layout(&self.text, self.scale, point(0.0, self.v_metrics.ascent))
|
|
||||||
.collect();
|
|
||||||
let min_x = glyphs
|
|
||||||
.first()
|
|
||||||
.map(|g| {
|
|
||||||
if let Some(bb) = g.pixel_bounding_box() {
|
|
||||||
bb.min.x
|
|
||||||
} else {
|
|
||||||
g.position().x as i32
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.unwrap_or(0);
|
|
||||||
let max_x = glyphs
|
|
||||||
.last()
|
|
||||||
.map(|g| {
|
|
||||||
if let Some(bb) = g.pixel_bounding_box() {
|
|
||||||
bb.max.x
|
|
||||||
} else {
|
|
||||||
(g.position().x + g.unpositioned().h_metrics().advance_width) as i32
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.unwrap_or(0);
|
|
||||||
(max_x - min_x) as usize
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Drawable for Text<'a> {
|
|
||||||
fn draw(&self, canvas: &mut Canvas) {
|
|
||||||
self.draw_text(canvas);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1 @@
|
||||||
|
{"files":{"CHANGELOG.md":"2d031d89de7918f60e866fbdb1069a8e927112e817f384990dfe50873578e7d7","Cargo.lock":"faa3eeb18c2fab92248ac415c6e4c056e443779b58ba8bea86076c4132755e47","Cargo.toml":"aa8de0b7b23e0cb281a6bbcd1652964b70129a6a8f34f5313aa54264a25b8229","LICENSE":"0dd39f89842df915b8ded7ac59e8a1372cf5be36133818866cca2ef3af1a2849","README.md":"54274cb43d69009c72c8d28647ba28bc33f639bb0e81fb4fa8ace6ffc6c66bc2","doc_index.html":"284a4836b0eef54a3d8307e490d466fa9b1b749884fb88cf097e7cbbffb75590","examples/test.rs":"dafef175db24c64037a36397dccb0ba6879e6abb08befdb56d2fb5af47f7ffe1","src/lib.rs":"821bccf3441e41c4e55a6991d47a998c712e0a2f5ca6595752b005cbb453c3f3","src/line.rs":"cb3d3ea7938a74c169ce9c5b43111fe2793936324b499767affaf7e6c6340cf9","src/shapes/mod.rs":"071d6ea4080dc8f1e4299258d65c32bccc40e9eb6933f3b3600576d58e7917ae","src/shapes/rectangle.rs":"66b92e36d3e1df73facc5d8d37db962cb2a8d68a7e66ae6be0af9eab621ed917","src/text/fontconfig.rs":"c673bfcf5df387479dd2027a733d8de85461731b448202f49a9f2d1bce54f465","src/text/mod.rs":"4ce335cac3776b9a593989c30ffdc18b53bb14e2ab70d21eeacbb3bc710ac8cf"},"package":"8c4afb09dd642feec8408e33f92f3ffc4052946f6b20f32fb99c1f58cd4fa7cf"}
|
|
@ -0,0 +1,60 @@
|
||||||
|
"""
|
||||||
|
@generated
|
||||||
|
cargo-raze crate build file.
|
||||||
|
|
||||||
|
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||||
|
"""
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load(
|
||||||
|
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||||
|
"rust_binary",
|
||||||
|
"rust_library",
|
||||||
|
"rust_test",
|
||||||
|
)
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||||
|
|
||||||
|
package(default_visibility = [
|
||||||
|
# Public for visibility by "@raze__crate__version//" targets.
|
||||||
|
#
|
||||||
|
# Prefer access through "//third_party/cargo", which limits external
|
||||||
|
# visibility to explicit Cargo.toml dependencies.
|
||||||
|
"//visibility:public",
|
||||||
|
])
|
||||||
|
|
||||||
|
licenses([
|
||||||
|
"notice", # MIT from expression "MIT"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Generated Targets
|
||||||
|
|
||||||
|
# Unsupported target "test" with type "example" omitted
|
||||||
|
|
||||||
|
rust_library(
|
||||||
|
name = "andrew",
|
||||||
|
srcs = glob(["**/*.rs"]),
|
||||||
|
crate_features = [
|
||||||
|
],
|
||||||
|
crate_root = "src/lib.rs",
|
||||||
|
crate_type = "lib",
|
||||||
|
data = [],
|
||||||
|
edition = "2015",
|
||||||
|
rustc_flags = [
|
||||||
|
"--cap-lints=allow",
|
||||||
|
],
|
||||||
|
tags = [
|
||||||
|
"cargo-raze",
|
||||||
|
"manual",
|
||||||
|
],
|
||||||
|
version = "0.3.1",
|
||||||
|
# buildifier: leave-alone
|
||||||
|
deps = [
|
||||||
|
"//third_party/cargo/vendor/bitflags-1.2.1:bitflags",
|
||||||
|
"//third_party/cargo/vendor/rusttype-0.9.2:rusttype",
|
||||||
|
"//third_party/cargo/vendor/walkdir-2.3.1:walkdir",
|
||||||
|
"//third_party/cargo/vendor/xdg-2.2.0:xdg",
|
||||||
|
"//third_party/cargo/vendor/xml-rs-0.8.3:xml_rs",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,58 @@
|
||||||
|
# Change Log
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
|
## 0.3.1 -- 2020-10-23
|
||||||
|
|
||||||
|
- Speed up rectangle drawing
|
||||||
|
- Remove dependency on line_drawing
|
||||||
|
- Update sctk dev dependency to 0.12
|
||||||
|
|
||||||
|
## 0.3.0 -- 2020-05-27
|
||||||
|
|
||||||
|
- Raised MSRV to `1.41.0`.
|
||||||
|
- Upgraded dependency versions.
|
||||||
|
|
||||||
|
## 0.2.1 -- 2019-03-29
|
||||||
|
|
||||||
|
- Fix `get_width()` for texts that start and end with spaces
|
||||||
|
|
||||||
|
## 0.2.0 -- 2019-01-26
|
||||||
|
|
||||||
|
- **[Breaking]** Canvas is now endian aware and will draw to the buffer in the endianness of the `Endian` its created with
|
||||||
|
|
||||||
|
## 0.1.6 -- 2019-01-24
|
||||||
|
|
||||||
|
- Faster drawing of horizontal and verticle lines by precomputing line boundaries
|
||||||
|
- Only calculate alpha overlay when drawing colors without a non-max alpha value for performance
|
||||||
|
|
||||||
|
## 0.1.5 -- 2019-01-13
|
||||||
|
|
||||||
|
- Fix drawing of characters with negative bounding boxes
|
||||||
|
- Fix error in `get_width()` for text without any characters
|
||||||
|
|
||||||
|
## 0.1.4 -- 2018-11-10
|
||||||
|
|
||||||
|
- Remove rusttype version restriction
|
||||||
|
|
||||||
|
## 0.1.3 -- 2018-10-09
|
||||||
|
|
||||||
|
- Move from `quick-xml` to `xml-rs` dependency
|
||||||
|
|
||||||
|
## 0.1.2 -- 2018-10-04
|
||||||
|
|
||||||
|
- Add basic/experimental support for fontconfig in `andrew::text::fontconfig`
|
||||||
|
|
||||||
|
## 0.1.1 -- 2018-09-17
|
||||||
|
|
||||||
|
- Manage dependencies to maintain rust 1.22 compatibility
|
||||||
|
- Update rusttype to 0.7.1
|
||||||
|
|
||||||
|
## 0.1.0 -- 2018-08-17
|
||||||
|
|
||||||
|
Initial version, including:
|
||||||
|
|
||||||
|
- canvas
|
||||||
|
- lines
|
||||||
|
- rectangles
|
||||||
|
- text
|
|
@ -0,0 +1,419 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
[[package]]
|
||||||
|
name = "ab_glyph_rasterizer"
|
||||||
|
version = "0.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d9fe5e32de01730eb1f6b7f5b51c17e03e2325bf40a74f754f04f130043affff"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "andrew"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5e1ea80a5089cac999ffd4a91888154076a961d27387b0f7a6cd2d4dddb636b9"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"line_drawing",
|
||||||
|
"rusttype",
|
||||||
|
"walkdir",
|
||||||
|
"xdg",
|
||||||
|
"xml-rs",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "andrew"
|
||||||
|
version = "0.3.1"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"rusttype",
|
||||||
|
"smithay-client-toolkit",
|
||||||
|
"walkdir",
|
||||||
|
"xdg",
|
||||||
|
"xml-rs",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bitflags"
|
||||||
|
version = "1.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "byteorder"
|
||||||
|
version = "1.3.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "calloop"
|
||||||
|
version = "0.6.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b036167e76041694579972c28cf4877b4f92da222560ddb49008937b6a6727c"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"nix",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cc"
|
||||||
|
version = "1.0.61"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ed67cbde08356238e75fc4656be4749481eeffb09e19f320a25237d5221c985d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "0.1.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dlib"
|
||||||
|
version = "0.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b11f15d1e3268f140f68d390637d5e76d849782d971ae7063e0da69fe9709a76"
|
||||||
|
dependencies = [
|
||||||
|
"libloading",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "downcast-rs"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazy_static"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.79"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libloading"
|
||||||
|
version = "0.6.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3557c9384f7f757f6d139cd3a4c62ef4e850696c16bf27924a5538c8a09717a1"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "line_drawing"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f81902e542483002b103c6424d23e765c2e5a65f732923299053a601bce50ab2"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits 0.1.43",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "log"
|
||||||
|
version = "0.4.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memmap"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nix"
|
||||||
|
version = "0.18.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "83450fe6a6142ddd95fb064b746083fc4ef1705fe81f64a64e1d4b39f54a1055"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cc",
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nom"
|
||||||
|
version = "5.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-traits"
|
||||||
|
version = "0.1.43"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits 0.2.12",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-traits"
|
||||||
|
version = "0.2.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "260e51e7efe62b592207e9e13a68e43692a7a279171d6ba57abd208bf23645ad"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "owned_ttf_parser"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9f923fb806c46266c02ab4a5b239735c144bdeda724a50ed058e5226f594cde3"
|
||||||
|
dependencies = [
|
||||||
|
"ttf-parser",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pkg-config"
|
||||||
|
version = "0.3.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.24"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-xid",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rusttype"
|
||||||
|
version = "0.9.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dc7c727aded0be18c5b80c1640eae0ac8e396abf6fa8477d96cb37d18ee5ec59"
|
||||||
|
dependencies = [
|
||||||
|
"ab_glyph_rasterizer",
|
||||||
|
"owned_ttf_parser",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "same-file"
|
||||||
|
version = "1.0.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "smallvec"
|
||||||
|
version = "1.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fbee7696b84bbf3d89a1c2eccff0850e3047ed46bfcd2e92c29a2d074d57e252"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "smithay-client-toolkit"
|
||||||
|
version = "0.12.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2ec5c077def8af49f9b5aeeb5fcf8079c638c6615c3a8f9305e2dea601de57f7"
|
||||||
|
dependencies = [
|
||||||
|
"andrew 0.3.0",
|
||||||
|
"bitflags",
|
||||||
|
"byteorder",
|
||||||
|
"calloop",
|
||||||
|
"dlib",
|
||||||
|
"lazy_static",
|
||||||
|
"log",
|
||||||
|
"memmap",
|
||||||
|
"nix",
|
||||||
|
"wayland-client",
|
||||||
|
"wayland-cursor",
|
||||||
|
"wayland-protocols",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ttf-parser"
|
||||||
|
version = "0.6.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3e5d7cd7ab3e47dda6e56542f4bbf3824c15234958c6e1bd6aaa347e93499fdc"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-xid"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "walkdir"
|
||||||
|
version = "2.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d"
|
||||||
|
dependencies = [
|
||||||
|
"same-file",
|
||||||
|
"winapi",
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wayland-client"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "80c54f9b90b2c044784f91fe22c5619a8a9c681db38492f2fd78ff968cf3f184"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"downcast-rs",
|
||||||
|
"libc",
|
||||||
|
"nix",
|
||||||
|
"wayland-commons",
|
||||||
|
"wayland-scanner",
|
||||||
|
"wayland-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wayland-commons"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7602d75560fe6f02cac723609cce658042fe60541b5107999818d29d4dab7cfa"
|
||||||
|
dependencies = [
|
||||||
|
"nix",
|
||||||
|
"once_cell",
|
||||||
|
"smallvec",
|
||||||
|
"wayland-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wayland-cursor"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0446b959c5b5b4b2c11f63112fc7cbeb50ecd9f2c340d2b0ea632875685baf04"
|
||||||
|
dependencies = [
|
||||||
|
"nix",
|
||||||
|
"wayland-client",
|
||||||
|
"xcursor",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wayland-protocols"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0d419585bbdb150fb541579cff205c6095a86cd874530e41838d1f18a9569a08"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"wayland-client",
|
||||||
|
"wayland-commons",
|
||||||
|
"wayland-scanner",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wayland-scanner"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e1cc091af4b05a435312f7cefe3a26824d2017966a58362ca913f72c3d68e5e2"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"xml-rs",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wayland-sys"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e5640f53d1fe6eaaa2e77b9ff015fe9a556173ce8388607f941aecfd9b05c73e"
|
||||||
|
dependencies = [
|
||||||
|
"pkg-config",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-util"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "xcursor"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d3a481cfdefd35e1c50073ae33a8000d695c98039544659f5dc5dd71311b0d01"
|
||||||
|
dependencies = [
|
||||||
|
"nom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "xdg"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d089681aa106a86fade1b0128fb5daf07d5867a509ab036d99988dec80429a57"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "xml-rs"
|
||||||
|
version = "0.8.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a"
|
|
@ -0,0 +1,38 @@
|
||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||||
|
#
|
||||||
|
# When uploading crates to the registry Cargo will automatically
|
||||||
|
# "normalize" Cargo.toml files for maximal compatibility
|
||||||
|
# with all versions of Cargo and also rewrite `path` dependencies
|
||||||
|
# to registry (e.g., crates.io) dependencies
|
||||||
|
#
|
||||||
|
# If you believe there's an error in this file please file an
|
||||||
|
# issue against the rust-lang/cargo repository. If you're
|
||||||
|
# editing this file be aware that the upstream Cargo.toml
|
||||||
|
# will likely look very different (and much more reasonable)
|
||||||
|
|
||||||
|
[package]
|
||||||
|
name = "andrew"
|
||||||
|
version = "0.3.1"
|
||||||
|
authors = ["Lucas Timmins <timmins.s.lucas@gmail.com>"]
|
||||||
|
description = "The andrew crate provides convenient drawing of objects such as shapes, lines and text to buffers"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = ["draw", "buffer", "shapes", "lines", "pixels"]
|
||||||
|
categories = ["rendering", "multimedia::images"]
|
||||||
|
license = "MIT"
|
||||||
|
repository = "https://github.com/Smithay/andrew"
|
||||||
|
[dependencies.bitflags]
|
||||||
|
version = "1.2.1"
|
||||||
|
|
||||||
|
[dependencies.rusttype]
|
||||||
|
version = "0.9.2"
|
||||||
|
|
||||||
|
[dependencies.walkdir]
|
||||||
|
version = "2.3.1"
|
||||||
|
|
||||||
|
[dependencies.xdg]
|
||||||
|
version = "2.2.0"
|
||||||
|
|
||||||
|
[dependencies.xml-rs]
|
||||||
|
version = "0.8.3"
|
||||||
|
[dev-dependencies.smithay-client-toolkit]
|
||||||
|
version = "0.12"
|
|
@ -0,0 +1,13 @@
|
||||||
|
[![crates.io](http://meritbadge.herokuapp.com/andrew)](https://crates.io/crates/andrew)
|
||||||
|
[![Build Status](https://travis-ci.org/Smithay/andrew.svg?branch=master)](https://travis-ci.org/Smithay/andrew)
|
||||||
|
[![Docs Status](https://docs.rs/andrew/badge.svg)](https://docs.rs/andrew)
|
||||||
|
|
||||||
|
# Andrew
|
||||||
|
|
||||||
|
This crate provides convenient drawing of objects such as shapes, lines and text to buffers
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
The documentation for the master branch is [available online](https://smithay.github.io/andrew/).
|
||||||
|
|
||||||
|
The documentation for the releases can be found on [docs.rs](https://docs.rs/andrew).
|
|
@ -0,0 +1,6 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv=refresh content=0;url=andrew/index.html />
|
||||||
|
</head>
|
||||||
|
</html>
|
|
@ -0,0 +1,206 @@
|
||||||
|
extern crate andrew;
|
||||||
|
extern crate smithay_client_toolkit as sctk;
|
||||||
|
|
||||||
|
use std::io::{Read, Seek, SeekFrom, Write};
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
use sctk::reexports::client::protocol::{wl_seat::WlSeat, wl_shm, wl_surface};
|
||||||
|
use sctk::shm::{DoubleMemPool, MemPool};
|
||||||
|
use sctk::window::{ConceptFrame, Event as WEvent};
|
||||||
|
|
||||||
|
use andrew::shapes::rectangle;
|
||||||
|
use andrew::text;
|
||||||
|
use andrew::text::fontconfig;
|
||||||
|
|
||||||
|
sctk::default_environment!(TestExample, desktop);
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let (env, display, mut event_queue) = sctk::new_default_environment!(TestExample, desktop)
|
||||||
|
.expect("Unable to connect to a Wayland compositor");
|
||||||
|
|
||||||
|
let _seat = env.manager.instantiate_range::<WlSeat>(1, 6).unwrap();
|
||||||
|
|
||||||
|
let mut dimensions = (600, 400);
|
||||||
|
let surface = env.create_surface().detach();
|
||||||
|
let mut next_action = None::<WEvent>;
|
||||||
|
|
||||||
|
let mut window = env
|
||||||
|
.create_window::<ConceptFrame, _>(surface, None, dimensions, move |evt, mut dispatch_data| {
|
||||||
|
let next_actn = dispatch_data.get::<Option<WEvent>>().unwrap();
|
||||||
|
// Keep last event in priority order : Close > Configure > Refresh
|
||||||
|
let replace = match (&evt, &*next_actn) {
|
||||||
|
(_, &None)
|
||||||
|
| (_, &Some(WEvent::Refresh))
|
||||||
|
| (&WEvent::Configure { .. }, &Some(WEvent::Configure { .. }))
|
||||||
|
| (&WEvent::Close, _) => true,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
if replace {
|
||||||
|
*next_actn = Some(evt);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("Failed to create a window !");
|
||||||
|
|
||||||
|
let mut pools = DoubleMemPool::new(
|
||||||
|
env.get_global().expect("Failed to get `WlShm` global."),
|
||||||
|
|_| {},
|
||||||
|
)
|
||||||
|
.expect("Failed to create a memory pool !");
|
||||||
|
|
||||||
|
let mut font_data = Vec::new();
|
||||||
|
::std::fs::File::open(
|
||||||
|
&fontconfig::FontConfig::new()
|
||||||
|
.unwrap()
|
||||||
|
.get_regular_family_fonts("sans")
|
||||||
|
.unwrap()[0],
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
.read_to_end(&mut font_data)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
if !env
|
||||||
|
.get_shell()
|
||||||
|
.expect("Expected environment to contain a shell.")
|
||||||
|
.needs_configure()
|
||||||
|
{
|
||||||
|
if let Some(pool) = pools.pool() {
|
||||||
|
redraw(pool, window.surface(), dimensions, &font_data);
|
||||||
|
}
|
||||||
|
window.refresh();
|
||||||
|
}
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match next_action.take() {
|
||||||
|
Some(WEvent::Close) => break,
|
||||||
|
Some(WEvent::Refresh) => {
|
||||||
|
window.refresh();
|
||||||
|
window.surface().commit();
|
||||||
|
}
|
||||||
|
Some(WEvent::Configure { new_size, .. }) => {
|
||||||
|
if let Some((w, h)) = new_size {
|
||||||
|
window.resize(w, h);
|
||||||
|
dimensions = (w, h)
|
||||||
|
}
|
||||||
|
window.refresh();
|
||||||
|
if let Some(pool) = pools.pool() {
|
||||||
|
redraw(pool, window.surface(), dimensions, &font_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
display.flush().unwrap();
|
||||||
|
event_queue
|
||||||
|
.dispatch(&mut next_action, |_, _, _| {})
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn redraw(
|
||||||
|
pool: &mut MemPool,
|
||||||
|
surface: &wl_surface::WlSurface,
|
||||||
|
dimensions: (u32, u32),
|
||||||
|
font_data: &[u8],
|
||||||
|
) {
|
||||||
|
let (buf_x, buf_y) = (dimensions.0 as usize, dimensions.1 as usize);
|
||||||
|
|
||||||
|
pool.resize(4 * buf_x * buf_y)
|
||||||
|
.expect("Failed to resize the memory pool.");
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = vec![255; 4 * buf_x * buf_y];
|
||||||
|
let mut canvas =
|
||||||
|
andrew::Canvas::new(&mut buf, buf_x, buf_y, 4 * buf_x, andrew::Endian::native());
|
||||||
|
|
||||||
|
println!("______________");
|
||||||
|
let mut total_dur = Duration::new(0, 0);
|
||||||
|
|
||||||
|
// Draw background
|
||||||
|
let (block_w, block_h) = (buf_x / 20, buf_y / 20);
|
||||||
|
for block_y in 0..21 {
|
||||||
|
for block_x in 0..21 {
|
||||||
|
let color = if (block_x + (block_y % 2)) % 2 == 0 {
|
||||||
|
[255, 0, 0, 0]
|
||||||
|
} else {
|
||||||
|
[255, 255, 255, 255]
|
||||||
|
};
|
||||||
|
|
||||||
|
let block = rectangle::Rectangle::new(
|
||||||
|
(block_w * block_x, block_h * block_y),
|
||||||
|
(block_w, block_h),
|
||||||
|
None,
|
||||||
|
Some(color),
|
||||||
|
);
|
||||||
|
let timer = Instant::now();
|
||||||
|
canvas.draw(&block);
|
||||||
|
total_dur += timer.elapsed()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!("Background draw time: {:?}", total_dur);
|
||||||
|
|
||||||
|
let rectangle = rectangle::Rectangle::new(
|
||||||
|
(buf_x / 30, buf_y / 4),
|
||||||
|
(buf_x - (buf_x / 30) * 2, buf_y - buf_y / 2),
|
||||||
|
Some((
|
||||||
|
15,
|
||||||
|
[255, 170, 20, 45],
|
||||||
|
rectangle::Sides::TOP ^ rectangle::Sides::BOTTOM,
|
||||||
|
Some(10),
|
||||||
|
)),
|
||||||
|
Some([255, 170, 20, 45]),
|
||||||
|
);
|
||||||
|
let mut timer = Instant::now();
|
||||||
|
canvas.draw(&rectangle);
|
||||||
|
println!("Rectangle draw time: {:?}", timer.elapsed());
|
||||||
|
total_dur += timer.elapsed();
|
||||||
|
|
||||||
|
let text_h = buf_x as f32 / 80.;
|
||||||
|
let text_hh = text_h / 2.;
|
||||||
|
let mut text = text::Text::new(
|
||||||
|
(63, 69),
|
||||||
|
[255, 255, 255, 255],
|
||||||
|
font_data,
|
||||||
|
text_h,
|
||||||
|
2.0,
|
||||||
|
"“Life is the art of drawing without an eraser.” - John W. Gardner",
|
||||||
|
);
|
||||||
|
text.pos = (
|
||||||
|
buf_x / 2 - text.get_width() / 2,
|
||||||
|
buf_y / 2 - text_hh as usize,
|
||||||
|
);
|
||||||
|
|
||||||
|
let text_box = rectangle::Rectangle::new(
|
||||||
|
(
|
||||||
|
buf_x / 2 - text.get_width() / 2 - 10,
|
||||||
|
buf_y / 2 - text_hh as usize - 10,
|
||||||
|
),
|
||||||
|
(text.get_width() + 20, text_h as usize + 20),
|
||||||
|
Some((3, [255, 255, 255, 255], rectangle::Sides::ALL, Some(5))),
|
||||||
|
None,
|
||||||
|
);
|
||||||
|
|
||||||
|
timer = Instant::now();
|
||||||
|
canvas.draw(&text_box);
|
||||||
|
println!("Text box draw time: {:?}", timer.elapsed());
|
||||||
|
total_dur += timer.elapsed();
|
||||||
|
|
||||||
|
timer = Instant::now();
|
||||||
|
canvas.draw(&text);
|
||||||
|
println!("Text draw time: {:?}", timer.elapsed());
|
||||||
|
total_dur += timer.elapsed();
|
||||||
|
|
||||||
|
println!("Total draw time: {:?}", total_dur);
|
||||||
|
|
||||||
|
pool.seek(SeekFrom::Start(0)).unwrap();
|
||||||
|
pool.write_all(canvas.buffer).unwrap();
|
||||||
|
pool.flush().unwrap();
|
||||||
|
|
||||||
|
let new_buffer = pool.buffer(
|
||||||
|
0,
|
||||||
|
buf_x as i32,
|
||||||
|
buf_y as i32,
|
||||||
|
4 * buf_x as i32,
|
||||||
|
wl_shm::Format::Argb8888,
|
||||||
|
);
|
||||||
|
surface.attach(Some(&new_buffer), 0, 0);
|
||||||
|
surface.commit();
|
||||||
|
}
|
|
@ -0,0 +1,133 @@
|
||||||
|
//! Andrew is a crate for drawing objects
|
||||||
|
#![warn(missing_docs)]
|
||||||
|
extern crate rusttype;
|
||||||
|
extern crate walkdir;
|
||||||
|
extern crate xdg;
|
||||||
|
extern crate xml;
|
||||||
|
|
||||||
|
#[macro_use]
|
||||||
|
extern crate bitflags;
|
||||||
|
|
||||||
|
/// A module that contains functions and objects relating to lines
|
||||||
|
pub mod line;
|
||||||
|
/// A module that contains functions and objects relating to shapes
|
||||||
|
pub mod shapes;
|
||||||
|
/// A module that contains functions and objects relating to text
|
||||||
|
pub mod text;
|
||||||
|
|
||||||
|
/// The Drawable trait allows object to be drawn to a buffer or canvas
|
||||||
|
pub trait Drawable {
|
||||||
|
/// A function that draws the object to a canvas
|
||||||
|
fn draw(&self, canvas: &mut Canvas);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Describes an endianness (aka byte order)
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum Endian {
|
||||||
|
/// Little Endian
|
||||||
|
Little,
|
||||||
|
/// Big Endian
|
||||||
|
Big,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Endian {
|
||||||
|
/// Returns the native endianness
|
||||||
|
pub fn native() -> Endian {
|
||||||
|
if cfg!(target_endian = "little") {
|
||||||
|
Endian::Little
|
||||||
|
} else {
|
||||||
|
Endian::Big
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The canvas object acts as a wrapper around a buffer, providing information and functions
|
||||||
|
/// for drawing
|
||||||
|
pub struct Canvas<'a> {
|
||||||
|
/// A buffer for the canvas to draw to
|
||||||
|
pub buffer: &'a mut [u8],
|
||||||
|
/// The width in pixels of the canvas
|
||||||
|
pub width: usize,
|
||||||
|
/// The height in pixels of the canvas
|
||||||
|
pub height: usize,
|
||||||
|
/// The number of bytes between each line of pixels on the canvas
|
||||||
|
pub stride: usize,
|
||||||
|
/// The number of bytes contained in each pixel
|
||||||
|
pub pixel_size: usize,
|
||||||
|
/// The endianness of the canvas
|
||||||
|
pub endianness: Endian,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Canvas<'a> {
|
||||||
|
/// Creates a new canvas object
|
||||||
|
pub fn new(
|
||||||
|
buffer: &'a mut [u8],
|
||||||
|
width: usize,
|
||||||
|
height: usize,
|
||||||
|
stride: usize,
|
||||||
|
endianness: Endian,
|
||||||
|
) -> Canvas<'a> {
|
||||||
|
assert!(
|
||||||
|
stride % width == 0,
|
||||||
|
"Incorrect Dimensions - Stride is not a multiple of width"
|
||||||
|
);
|
||||||
|
assert!(buffer.len() == stride * height);
|
||||||
|
let pixel_size = stride / width;
|
||||||
|
Canvas {
|
||||||
|
buffer,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
stride,
|
||||||
|
pixel_size,
|
||||||
|
endianness,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Draws an object that implements the Drawable trait to the buffer
|
||||||
|
pub fn draw<D: Drawable>(&mut self, drawable: &D) {
|
||||||
|
drawable.draw(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Draws a pixel at the x and y coordinate
|
||||||
|
pub fn draw_point(&mut self, x: usize, y: usize, color: [u8; 4]) {
|
||||||
|
let base = self.stride * y + self.pixel_size * x;
|
||||||
|
if self.endianness == Endian::Little {
|
||||||
|
if color[0] == 255 {
|
||||||
|
self.buffer[base + 3] = color[0];
|
||||||
|
self.buffer[base + 2] = color[1];
|
||||||
|
self.buffer[base + 1] = color[2];
|
||||||
|
self.buffer[base] = color[3];
|
||||||
|
} else {
|
||||||
|
for c in 0..3 {
|
||||||
|
let alpha = f32::from(color[0]) / 255.0;
|
||||||
|
let color_diff =
|
||||||
|
(color[3 - c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
|
||||||
|
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
|
||||||
|
self.buffer[base + c] = new_color as u8;
|
||||||
|
}
|
||||||
|
self.buffer[base + 3] = 255 as u8;
|
||||||
|
}
|
||||||
|
} else if color[0] == 255 {
|
||||||
|
self.buffer[base] = color[0];
|
||||||
|
self.buffer[base + 1] = color[1];
|
||||||
|
self.buffer[base + 2] = color[2];
|
||||||
|
self.buffer[base + 3] = color[3];
|
||||||
|
} else {
|
||||||
|
for c in 1..4 {
|
||||||
|
let alpha = f32::from(color[0]) / 255.0;
|
||||||
|
let color_diff =
|
||||||
|
(color[c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
|
||||||
|
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
|
||||||
|
self.buffer[base + c] = new_color as u8;
|
||||||
|
}
|
||||||
|
self.buffer[base] = 255 as u8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clears the entire canvas buffer by zeroing it
|
||||||
|
pub fn clear(&mut self) {
|
||||||
|
for i in 0..self.width * self.height * 4 {
|
||||||
|
self.buffer[i] = 0x00;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,167 @@
|
||||||
|
use std::cmp::{max, min};
|
||||||
|
|
||||||
|
use Canvas;
|
||||||
|
use Drawable;
|
||||||
|
use Endian;
|
||||||
|
|
||||||
|
/// A drawable object that represents a line
|
||||||
|
pub struct Line {
|
||||||
|
/// The first point of the line
|
||||||
|
pub pt1: (usize, usize),
|
||||||
|
/// The second point of the line
|
||||||
|
pub pt2: (usize, usize),
|
||||||
|
/// The color of the line
|
||||||
|
pub color: [u8; 4],
|
||||||
|
/// Decides whether the line will be antialiased
|
||||||
|
pub antialiased: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Line {
|
||||||
|
/// Creates a new Line object
|
||||||
|
pub fn new(
|
||||||
|
pt1: (usize, usize),
|
||||||
|
pt2: (usize, usize),
|
||||||
|
color: [u8; 4],
|
||||||
|
antialiased: bool,
|
||||||
|
) -> Line {
|
||||||
|
Line {
|
||||||
|
pt1,
|
||||||
|
pt2,
|
||||||
|
color,
|
||||||
|
antialiased,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drawable for Line {
|
||||||
|
fn draw(&self, canvas: &mut Canvas) {
|
||||||
|
if !self.antialiased {
|
||||||
|
if self.pt1.0 == self.pt2.0 && self.pt1.0 < canvas.width {
|
||||||
|
let min_y = min(self.pt1.1, self.pt2.1);
|
||||||
|
let max_y = min(max(self.pt1.1, self.pt2.1), canvas.height - 1);
|
||||||
|
for y in min_y..=max_y {
|
||||||
|
canvas.draw_point(self.pt1.0, y, self.color)
|
||||||
|
}
|
||||||
|
} else if self.pt1.1 == self.pt2.1 && self.pt1.1 < canvas.height {
|
||||||
|
let min_x = min(self.pt1.0, self.pt2.0);
|
||||||
|
let max_x = min(max(self.pt1.0, self.pt2.0), canvas.width - 1);
|
||||||
|
for x in min_x..=max_x {
|
||||||
|
canvas.draw_point(x, self.pt1.1, self.color)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Angled line without antialias
|
||||||
|
for (x, y) in bresenham(
|
||||||
|
self.pt1.0 as isize,
|
||||||
|
self.pt1.1 as isize,
|
||||||
|
self.pt2.0 as isize,
|
||||||
|
self.pt2.1 as isize,
|
||||||
|
) {
|
||||||
|
if x < canvas.width && y < canvas.height {
|
||||||
|
canvas.draw_point(x, y, self.color)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Angled line with antialias
|
||||||
|
for (x, y, coverage) in xiaolin_wu(
|
||||||
|
self.pt1.0 as f32,
|
||||||
|
self.pt1.1 as f32,
|
||||||
|
self.pt2.0 as f32,
|
||||||
|
self.pt2.1 as f32,
|
||||||
|
) {
|
||||||
|
if x < canvas.width && y < canvas.height {
|
||||||
|
let mut color = self.color;
|
||||||
|
let base = canvas.stride * y + canvas.pixel_size * x;
|
||||||
|
if coverage != 1.0 {
|
||||||
|
if canvas.endianness == Endian::Little {
|
||||||
|
color[1] = (canvas.buffer[base + 2] as f32 * (1.0 - coverage)
|
||||||
|
+ color[1] as f32 * coverage)
|
||||||
|
as u8;
|
||||||
|
color[2] = (canvas.buffer[base + 1] as f32 * (1.0 - coverage)
|
||||||
|
+ color[2] as f32 * coverage)
|
||||||
|
as u8;
|
||||||
|
color[3] = (canvas.buffer[base] as f32 * (1.0 - coverage)
|
||||||
|
+ color[3] as f32 * coverage)
|
||||||
|
as u8;
|
||||||
|
} else {
|
||||||
|
color[1] = (canvas.buffer[base + 1] as f32 * (1.0 - coverage)
|
||||||
|
+ color[1] as f32 * coverage)
|
||||||
|
as u8;
|
||||||
|
color[2] = (canvas.buffer[base + 2] as f32 * (1.0 - coverage)
|
||||||
|
+ color[2] as f32 * coverage)
|
||||||
|
as u8;
|
||||||
|
color[3] = (canvas.buffer[base + 3] as f32 * (1.0 - coverage)
|
||||||
|
+ color[3] as f32 * coverage)
|
||||||
|
as u8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
canvas.draw_point(x as usize, y as usize, color)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bresenham(mut x0: isize, mut y0: isize, x1: isize, y1: isize) -> Vec<(usize, usize)> {
|
||||||
|
let mut points: Vec<(usize, usize)> = Vec::new();
|
||||||
|
let dx = (x1 - x0).abs();
|
||||||
|
let sx = if x0 < x1 { 1 } else { -1 };
|
||||||
|
let dy = -((y1 - y0).abs());
|
||||||
|
let sy = if y0 < y1 { 1 } else { -1 };
|
||||||
|
let mut err = dx + dy;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
points.push((x0 as usize, y0 as usize));
|
||||||
|
if x0 == x1 && y0 == y1 {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
let e2 = 2 * err;
|
||||||
|
if e2 >= dy {
|
||||||
|
err += dy;
|
||||||
|
x0 += sx;
|
||||||
|
}
|
||||||
|
if e2 <= dx {
|
||||||
|
err += dx;
|
||||||
|
y0 += sy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
points
|
||||||
|
}
|
||||||
|
|
||||||
|
fn xiaolin_wu(mut x0: f32, mut y0: f32, mut x1: f32, mut y1: f32) -> Vec<(usize, usize, f32)> {
|
||||||
|
let mut points: Vec<(usize, usize, f32)> = Vec::new();
|
||||||
|
let steep = (y1 - y0).abs() > (x1 - x0).abs();
|
||||||
|
if steep {
|
||||||
|
std::mem::swap(&mut x0, &mut y0);
|
||||||
|
std::mem::swap(&mut x1, &mut y1);
|
||||||
|
}
|
||||||
|
if x0 > x1 {
|
||||||
|
std::mem::swap(&mut x0, &mut x1);
|
||||||
|
std::mem::swap(&mut y0, &mut y1);
|
||||||
|
}
|
||||||
|
let dx = x1 - x0;
|
||||||
|
let dy = y1 - y0;
|
||||||
|
let gradient = if dx == 0.0 {
|
||||||
|
1.0
|
||||||
|
} else {
|
||||||
|
dy as f32 / dx as f32
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut intery = y0 + gradient;
|
||||||
|
points.push((x0 as usize, y0 as usize, 1.0));
|
||||||
|
points.push((x1 as usize, y1 as usize, 1.0));
|
||||||
|
if steep {
|
||||||
|
for x in x0 as usize + 1..=x1 as usize - 1 {
|
||||||
|
points.push((intery as usize, x, 1.0 - intery.fract()));
|
||||||
|
points.push((intery as usize + 1, x, intery.fract()));
|
||||||
|
intery = intery + gradient;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for x in x0 as usize + 1..=x1 as usize - 1 {
|
||||||
|
points.push((x, intery as usize, 1.0 - intery.fract()));
|
||||||
|
points.push((x, intery as usize + 1, intery.fract()));
|
||||||
|
intery = intery + gradient;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
points
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue