third_party: bump everything

windows
q3k 2021-01-07 23:38:16 +00:00
parent 08f6dc3ac5
commit 2938a31682
5120 changed files with 649737 additions and 499478 deletions

View File

@ -14,7 +14,7 @@ licenses([
# Aliased targets
alias(
name = "cgmath",
actual = "//third_party/cargo/vendor/cgmath-0.17.0:cgmath",
actual = "//third_party/cargo/vendor/cgmath-0.18.0:cgmath",
tags = [
"cargo-raze",
"manual",
@ -41,7 +41,7 @@ alias(
alias(
name = "image",
actual = "//third_party/cargo/vendor/image-0.23.4:image",
actual = "//third_party/cargo/vendor/image-0.23.12:image",
tags = [
"cargo-raze",
"manual",
@ -50,16 +50,7 @@ alias(
alias(
name = "log",
actual = "//third_party/cargo/vendor/log-0.4.8:log",
tags = [
"cargo-raze",
"manual",
],
)
alias(
name = "openvr",
actual = "//third_party/cargo/vendor/openvr-0.6.0:openvr",
actual = "//third_party/cargo/vendor/log-0.4.11:log",
tags = [
"cargo-raze",
"manual",
@ -68,7 +59,7 @@ alias(
alias(
name = "vulkano",
actual = "//third_party/cargo/vendor/vulkano-0.18.0:vulkano",
actual = "//third_party/cargo/vendor/vulkano-0.20.0:vulkano",
tags = [
"cargo-raze",
"manual",
@ -77,7 +68,7 @@ alias(
alias(
name = "vulkano_win",
actual = "//third_party/cargo/vendor/vulkano-win-0.18.0:vulkano_win",
actual = "//third_party/cargo/vendor/vulkano-win-0.20.0:vulkano_win",
tags = [
"cargo-raze",
"manual",
@ -86,7 +77,7 @@ alias(
alias(
name = "winit",
actual = "//third_party/cargo/vendor/winit-0.22.2:winit",
actual = "//third_party/cargo/vendor/winit-0.24.0:winit",
tags = [
"cargo-raze",
"manual",

1048
third_party/cargo/Cargo.lock generated vendored

File diff suppressed because it is too large Load Diff

View File

@ -10,11 +10,11 @@ path = "fake_lib.rs"
env_logger = "0.6.1"
image = "0.23.1"
log = "0.4.6"
vulkano = "0.18.0"
vulkano-win = "0.18.0"
winit = "0.22.0"
cgmath = "0.17.0"
openvr = "0.6.0"
vulkano = "0.20.0"
vulkano-win = "0.20.0"
winit = "0.24.0"
cgmath = "0.18.0"
#openvr = "0.6.0"
flatbuffers = "0.6.1"
[workspace.metadata.raze]
@ -43,11 +43,23 @@ additional_flags = [
"--cfg=atomic_cas"
]
[workspace.metadata.raze.crates.libloading.'0.6.2']
[workspace.metadata.raze.crates.libloading.'0.6.6']
additional_deps = ['//third_party/cargo/patches:libloading_global_static']
[workspace.metadata.raze.crates.cgmath.'0.17.0']
[workspace.metadata.raze.crates.cgmath.'0.18.0']
gen_buildrs = true
[workspace.metadata.raze.crates.image.'0.23.4']
[workspace.metadata.raze.crates.image.'0.23.12']
gen_buildrs = true
[workspace.metadata.raze.crates.num-rational.'0.3.2']
gen_buildrs = true
[workspace.metadata.raze.crates.libc.'0.2.82']
gen_buildrs = true
[workspace.metadata.raze.crates.wayland-client.'0.28.3']
gen_buildrs = true
[workspace.metadata.raze.crates.wayland-protocols.'0.28.3']
gen_buildrs = true

View File

@ -2,5 +2,5 @@ cc_library(
name = "libloading_global_static",
srcs = ["libloading/global_static.c"],
copts = ["-fPIC"],
visibility = ["//third_party/cargo/vendor/libloading-0.6.2:__pkg__"],
visibility = ["//third_party/cargo/vendor/libloading-0.6.6:__pkg__"],
)

View File

@ -0,0 +1 @@
{"files":{"CHANGELOG.md":"12997ba678cbef530fcd89e4e9b071bad7a63f2c21127a9895d12a71c4762f72","Cargo.toml":"74cdeb71983a9e460620aeba81a0ea6c98d6b656558d6b17bfff3e64c172a071","README.md":"59162e54bce67b4181f793866a73e4906b3cb4b45f3487f045aca2ce06611a80","src/geometry.rs":"8d970a944af7199ac6a42ace5d1ca661f7764d10a2af0eb09f7b356547f80cf8","src/lib.rs":"32f718b6be690d4d22fa60bf2d2f3b73f645e293a12f0e7c969c7ff2ac2f0a54","src/nostd_float.rs":"425e4f7a3c20213d561a376a09cb75a37ba3989b42e1700a3b15f642ccb99918","src/raster.rs":"49171451f8447d5200da96c5698cb4cd9e4d1556bb8d4cc76e998d48297d4f95","tests/issues.rs":"dff1f0f9992a49a71b3ac4e298033fe9687194a7948bdf29b110daa1ccc99790"},"package":"d9fe5e32de01730eb1f6b7f5b51c17e03e2325bf40a74f754f04f130043affff"}

View File

@ -0,0 +1,57 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_binary",
"rust_library",
"rust_test",
)
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # Apache-2.0 from expression "Apache-2.0"
])
# Generated Targets
rust_library(
name = "ab_glyph_rasterizer",
srcs = glob(["**/*.rs"]),
crate_features = [
"default",
"std",
],
crate_root = "src/lib.rs",
crate_type = "lib",
data = [],
edition = "2018",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "0.1.4",
# buildifier: leave-alone
deps = [
],
)
# Unsupported target "issues" with type "test" omitted

View File

@ -0,0 +1,16 @@
# 0.1.4
* Add `Rasterizer::reset`, `Rasterizer::clear` methods to allow allocation reuse.
# 0.1.3
* Fix index oob panic scenario.
# 0.1.2
* For `Point` implement `Sub`, `Add`, `SubAssign`, `AddAssign`, `PartialEq`, `PartialOrd`, `From<(x, y)>`,
`From<[x, y]>` for easier use downstream.
* Switch `Point` `Debug` implementation to output `point(1.2, 3.4)` smaller representation referring to the `point` fn.
# 0.1.1
* Add explicit compile error when building no_std without the "libm" feature.
# 0.1
* Implement zero dependency coverage rasterization for lines, quadratic & cubic beziers.

View File

@ -0,0 +1,31 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "ab_glyph_rasterizer"
version = "0.1.4"
authors = ["Alex Butler <alexheretic@gmail.com>"]
description = "Coverage rasterization for lines, quadratic & cubic beziers"
readme = "README.md"
keywords = ["text", "ttf", "otf", "font"]
license = "Apache-2.0"
repository = "https://github.com/alexheretic/ab-glyph"
[dependencies.libm]
version = "0.2.1"
optional = true
[dev-dependencies]
[features]
default = ["std"]
std = []

View File

@ -0,0 +1,51 @@
ab_glyph_rasterizer
[![crates.io](https://img.shields.io/crates/v/ab_glyph_rasterizer.svg)](https://crates.io/crates/ab_glyph_rasterizer)
[![Documentation](https://docs.rs/ab_glyph_rasterizer/badge.svg)](https://docs.rs/ab_glyph_rasterizer)
===================
Coverage rasterization for lines, quadratic & cubic beziers.
Useful for drawing .otf font glyphs.
Inspired by [font-rs](https://github.com/raphlinus/font-rs) &
[stb_truetype](https://github.com/nothings/stb/blob/master/stb_truetype.h).
## Example
```rust
let mut rasterizer = ab_glyph_rasterizer::Rasterizer::new(106, 183);
// draw a 300px 'ę' character
rasterizer.draw_cubic(point(103.0, 163.5), point(86.25, 169.25), point(77.0, 165.0), point(82.25, 151.5));
rasterizer.draw_cubic(point(82.25, 151.5), point(86.75, 139.75), point(94.0, 130.75), point(102.0, 122.0));
rasterizer.draw_line(point(102.0, 122.0), point(100.25, 111.25));
rasterizer.draw_cubic(point(100.25, 111.25), point(89.0, 112.75), point(72.75, 114.25), point(58.5, 114.25));
rasterizer.draw_cubic(point(58.5, 114.25), point(30.75, 114.25), point(18.5, 105.25), point(16.75, 72.25));
rasterizer.draw_line(point(16.75, 72.25), point(77.0, 72.25));
rasterizer.draw_cubic(point(77.0, 72.25), point(97.0, 72.25), point(105.25, 60.25), point(104.75, 38.5));
rasterizer.draw_cubic(point(104.75, 38.5), point(104.5, 13.5), point(89.0, 0.75), point(54.25, 0.75));
rasterizer.draw_cubic(point(54.25, 0.75), point(16.0, 0.75), point(0.0, 16.75), point(0.0, 64.0));
rasterizer.draw_cubic(point(0.0, 64.0), point(0.0, 110.5), point(16.0, 128.0), point(56.5, 128.0));
rasterizer.draw_cubic(point(56.5, 128.0), point(66.0, 128.0), point(79.5, 127.0), point(90.0, 125.0));
rasterizer.draw_cubic(point(90.0, 125.0), point(78.75, 135.25), point(73.25, 144.5), point(70.75, 152.0));
rasterizer.draw_cubic(point(70.75, 152.0), point(64.5, 169.0), point(75.5, 183.0), point(105.0, 170.5));
rasterizer.draw_line(point(105.0, 170.5), point(103.0, 163.5));
rasterizer.draw_cubic(point(55.0, 14.5), point(78.5, 14.5), point(88.5, 21.75), point(88.75, 38.75));
rasterizer.draw_cubic(point(88.75, 38.75), point(89.0, 50.75), point(85.75, 59.75), point(73.5, 59.75));
rasterizer.draw_line(point(73.5, 59.75), point(16.5, 59.75));
rasterizer.draw_cubic(point(16.5, 59.75), point(17.25, 25.5), point(27.0, 14.5), point(55.0, 14.5));
rasterizer.draw_line(point(55.0, 14.5), point(55.0, 14.5));
// iterate over the resultant pixel alphas, e.g. save pixel to a buffer
rasterizer.for_each_pixel(|index, alpha| {
// ...
});
```
Rendering the resultant pixel alphas as 8-bit grey produces:
![reference_otf_tailed_e](https://user-images.githubusercontent.com/2331607/78987793-ee95f480-7b26-11ea-91fb-e9f359d766f8.png)
## no_std
no_std environments are supported using `alloc` & [`libm`](https://github.com/rust-lang/libm).
```toml
ab_glyph_rasterizer = { default-features = false, features = ["libm"] }
```

View File

@ -0,0 +1,148 @@
#[cfg(all(feature = "libm", not(feature = "std")))]
use crate::nostd_float::FloatExt;
/// An (x, y) coordinate.
///
/// # Example
/// ```
/// use ab_glyph_rasterizer::{point, Point};
/// let p: Point = point(0.1, 23.2);
/// ```
#[derive(Clone, Copy, Default, PartialEq, PartialOrd)]
pub struct Point {
pub x: f32,
pub y: f32,
}
impl core::fmt::Debug for Point {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "point({:?}, {:?})", self.x, self.y)
}
}
impl Point {
#[inline]
pub(crate) fn distance_to(self, other: Point) -> f32 {
let d = other - self;
(d.x * d.x + d.y * d.y).sqrt()
}
}
/// [`Point`](struct.Point.html) constructor.
///
/// # Example
/// ```
/// # use ab_glyph_rasterizer::{point, Point};
/// let p = point(0.1, 23.2);
/// ```
#[inline]
pub fn point(x: f32, y: f32) -> Point {
Point { x, y }
}
/// Linear interpolation between points.
#[inline]
pub(crate) fn lerp(t: f32, p0: Point, p1: Point) -> Point {
point(p0.x + t * (p1.x - p0.x), p0.y + t * (p1.y - p0.y))
}
impl core::ops::Sub for Point {
type Output = Point;
/// Subtract rhs.x from x, rhs.y from y.
///
/// ```
/// # use ab_glyph_rasterizer::*;
/// let p1 = point(1.0, 2.0) - point(2.0, 1.5);
///
/// assert!((p1.x - -1.0).abs() <= core::f32::EPSILON);
/// assert!((p1.y - 0.5).abs() <= core::f32::EPSILON);
/// ```
#[inline]
fn sub(self, rhs: Point) -> Point {
point(self.x - rhs.x, self.y - rhs.y)
}
}
impl core::ops::Add for Point {
type Output = Point;
/// Add rhs.x to x, rhs.y to y.
///
/// ```
/// # use ab_glyph_rasterizer::*;
/// let p1 = point(1.0, 2.0) + point(2.0, 1.5);
///
/// assert!((p1.x - 3.0).abs() <= core::f32::EPSILON);
/// assert!((p1.y - 3.5).abs() <= core::f32::EPSILON);
/// ```
#[inline]
fn add(self, rhs: Point) -> Point {
point(self.x + rhs.x, self.y + rhs.y)
}
}
impl core::ops::AddAssign for Point {
/// ```
/// # use ab_glyph_rasterizer::*;
/// let mut p1 = point(1.0, 2.0);
/// p1 += point(2.0, 1.5);
///
/// assert!((p1.x - 3.0).abs() <= core::f32::EPSILON);
/// assert!((p1.y - 3.5).abs() <= core::f32::EPSILON);
/// ```
#[inline]
fn add_assign(&mut self, other: Self) {
self.x += other.x;
self.y += other.y;
}
}
impl core::ops::SubAssign for Point {
/// ```
/// # use ab_glyph_rasterizer::*;
/// let mut p1 = point(1.0, 2.0);
/// p1 -= point(2.0, 1.5);
///
/// assert!((p1.x - -1.0).abs() <= core::f32::EPSILON);
/// assert!((p1.y - 0.5).abs() <= core::f32::EPSILON);
/// ```
#[inline]
fn sub_assign(&mut self, other: Self) {
self.x -= other.x;
self.y -= other.y;
}
}
impl<F: Into<f32>> From<(F, F)> for Point {
/// ```
/// # use ab_glyph_rasterizer::*;
/// let p: Point = (23_f32, 34.5_f32).into();
/// let p2: Point = (5u8, 44u8).into();
/// ```
#[inline]
fn from((x, y): (F, F)) -> Self {
point(x.into(), y.into())
}
}
impl<F: Into<f32>> From<[F; 2]> for Point {
/// ```
/// # use ab_glyph_rasterizer::*;
/// let p: Point = [23_f32, 34.5].into();
/// let p2: Point = [5u8, 44].into();
/// ```
#[inline]
fn from([x, y]: [F; 2]) -> Self {
point(x.into(), y.into())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn distance_to() {
let distance = point(0.0, 0.0).distance_to(point(3.0, 4.0));
assert!((distance - 5.0).abs() <= core::f32::EPSILON);
}
}

View File

@ -0,0 +1,36 @@
//! Coverage rasterization for lines, quadratic & cubic beziers.
//! Useful for drawing .otf font glyphs.
//!
//! ```
//! use ab_glyph_rasterizer::Rasterizer;
//! # let (width, height) = (1, 1);
//! let mut rasterizer = Rasterizer::new(width, height);
//!
//! // draw outlines
//! # let [l0, l1, q0, q1, q2, c0, c1, c2, c3] = [ab_glyph_rasterizer::point(0.0, 0.0); 9];
//! rasterizer.draw_line(l0, l1);
//! rasterizer.draw_quad(q0, q1, q2);
//! rasterizer.draw_cubic(c0, c1, c2, c3);
//!
//! // iterate over the resultant pixel alphas, e.g. save pixel to a buffer
//! rasterizer.for_each_pixel(|index, alpha| {
//! // ...
//! });
//! ```
#![cfg_attr(not(feature = "std"), no_std)]
#[cfg(not(feature = "std"))]
#[macro_use]
extern crate alloc;
#[cfg(all(feature = "libm", not(feature = "std")))]
mod nostd_float;
#[cfg(not(any(feature = "libm", feature = "std")))]
compile_error!("You need to activate either the `std` or `libm` feature.");
mod geometry;
mod raster;
pub use geometry::{point, Point};
pub use raster::Rasterizer;

View File

@ -0,0 +1,31 @@
/// Basic required float operations.
pub(crate) trait FloatExt {
fn floor(self) -> Self;
fn ceil(self) -> Self;
fn sqrt(self) -> Self;
fn round(self) -> Self;
fn abs(self) -> Self;
}
impl FloatExt for f32 {
#[inline]
fn floor(self) -> Self {
libm::floorf(self)
}
#[inline]
fn ceil(self) -> Self {
libm::ceilf(self)
}
#[inline]
fn sqrt(self) -> Self {
libm::sqrtf(self)
}
#[inline]
fn round(self) -> Self {
libm::roundf(self)
}
#[inline]
fn abs(self) -> Self {
libm::fabsf(self)
}
}

View File

@ -0,0 +1,286 @@
// Forked/repurposed from `font-rs` code: https://github.com/raphlinus/font-rs
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Modifications copyright (C) 2020 Alex Butler
//
// Cubic bezier drawing adapted from stb_truetype: https://github.com/nothings/stb
#[cfg(all(feature = "libm", not(feature = "std")))]
use crate::nostd_float::FloatExt;
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
use crate::geometry::{lerp, Point};
/// Coverage rasterizer for lines, quadratic & cubic beziers.
pub struct Rasterizer {
width: usize,
height: usize,
a: Vec<f32>,
}
impl Rasterizer {
/// Allocates a new rasterizer that can draw onto a `width` x `height` alpha grid.
///
/// ```
/// use ab_glyph_rasterizer::Rasterizer;
/// let mut rasterizer = Rasterizer::new(14, 38);
/// ```
pub fn new(width: usize, height: usize) -> Self {
Self {
width,
height,
a: vec![0.0; width * height + 4],
}
}
/// Resets the rasterizer to an empty `width` x `height` alpha grid. This method behaves as if
/// the Rasterizer were re-created, with the advantage of not allocating if the total number of
/// pixels of the grid does not increase.
///
/// ```
/// # use ab_glyph_rasterizer::Rasterizer;
/// # let mut rasterizer = Rasterizer::new(14, 38);
/// rasterizer.reset(12, 24);
/// assert_eq!(rasterizer.dimensions(), (12, 24));
/// ```
pub fn reset(&mut self, width: usize, height: usize) {
self.width = width;
self.height = height;
self.a.truncate(0);
self.a.resize(width * height + 4, 0.0);
}
/// Clears the rasterizer. This method behaves as if the Rasterizer were re-created with the same
/// dimensions, but does not perform an allocation.
///
/// ```
/// # use ab_glyph_rasterizer::Rasterizer;
/// # let mut rasterizer = Rasterizer::new(14, 38);
/// rasterizer.clear();
/// ```
pub fn clear(&mut self) {
for px in &mut self.a {
*px = 0.0;
}
}
/// Returns the dimensions the rasterizer was built to draw to.
///
/// ```
/// # use ab_glyph_rasterizer::*;
/// let rasterizer = Rasterizer::new(9, 8);
/// assert_eq!((9, 8), rasterizer.dimensions());
/// ```
pub fn dimensions(&self) -> (usize, usize) {
(self.width, self.height)
}
/// Adds a straight line from `p0` to `p1` to the outline.
///
/// ```
/// # use ab_glyph_rasterizer::*;
/// # let mut rasterizer = Rasterizer::new(9, 8);
/// rasterizer.draw_line(point(0.0, 0.48), point(1.22, 0.48));
/// ```
pub fn draw_line(&mut self, p0: Point, p1: Point) {
if (p0.y - p1.y).abs() <= core::f32::EPSILON {
return;
}
let (dir, p0, p1) = if p0.y < p1.y {
(1.0, p0, p1)
} else {
(-1.0, p1, p0)
};
let dxdy = (p1.x - p0.x) / (p1.y - p0.y);
let mut x = p0.x;
let y0 = p0.y as usize; // note: implicit max of 0 because usize (TODO: really true?)
if p0.y < 0.0 {
x -= p0.y * dxdy;
}
for y in y0..self.height.min(p1.y.ceil() as usize) {
let linestart = y * self.width;
let dy = ((y + 1) as f32).min(p1.y) - (y as f32).max(p0.y);
let xnext = x + dxdy * dy;
let d = dy * dir;
let (x0, x1) = if x < xnext { (x, xnext) } else { (xnext, x) };
let x0floor = x0.floor();
let x0i = x0floor as i32;
let x1ceil = x1.ceil();
let x1i = x1ceil as i32;
if x1i <= x0i + 1 {
let xmf = 0.5 * (x + xnext) - x0floor;
let linestart_x0i = linestart as isize + x0i as isize;
if linestart_x0i < 0 {
continue; // oob index
}
self.a[linestart_x0i as usize] += d - d * xmf;
self.a[linestart_x0i as usize + 1] += d * xmf;
} else {
let s = (x1 - x0).recip();
let x0f = x0 - x0floor;
let a0 = 0.5 * s * (1.0 - x0f) * (1.0 - x0f);
let x1f = x1 - x1ceil + 1.0;
let am = 0.5 * s * x1f * x1f;
let linestart_x0i = linestart as isize + x0i as isize;
if linestart_x0i < 0 {
continue; // oob index
}
self.a[linestart_x0i as usize] += d * a0;
if x1i == x0i + 2 {
self.a[linestart_x0i as usize + 1] += d * (1.0 - a0 - am);
} else {
let a1 = s * (1.5 - x0f);
self.a[linestart_x0i as usize + 1] += d * (a1 - a0);
for xi in x0i + 2..x1i - 1 {
self.a[linestart + xi as usize] += d * s;
}
let a2 = a1 + (x1i - x0i - 3) as f32 * s;
self.a[linestart + (x1i - 1) as usize] += d * (1.0 - a2 - am);
}
self.a[linestart + x1i as usize] += d * am;
}
x = xnext;
}
}
/// Adds a quadratic Bézier curve from `p0` to `p2` to the outline using `p1` as the control.
///
/// ```
/// # use ab_glyph_rasterizer::*;
/// # let mut rasterizer = Rasterizer::new(14, 38);
/// rasterizer.draw_quad(point(6.2, 34.5), point(7.2, 34.5), point(9.2, 34.0));
/// ```
pub fn draw_quad(&mut self, p0: Point, p1: Point, p2: Point) {
let devx = p0.x - 2.0 * p1.x + p2.x;
let devy = p0.y - 2.0 * p1.y + p2.y;
let devsq = devx * devx + devy * devy;
if devsq < 0.333 {
self.draw_line(p0, p2);
return;
}
let tol = 3.0;
let n = 1 + (tol * devsq).sqrt().sqrt().floor() as usize;
let mut p = p0;
let nrecip = (n as f32).recip();
let mut t = 0.0;
for _i in 0..n - 1 {
t += nrecip;
let pn = lerp(t, lerp(t, p0, p1), lerp(t, p1, p2));
self.draw_line(p, pn);
p = pn;
}
self.draw_line(p, p2);
}
/// Adds a cubic Bézier curve from `p0` to `p3` to the outline using `p1` as the control
/// at the beginning of the curve and `p2` at the end of the curve.
///
/// ```
/// # use ab_glyph_rasterizer::*;
/// # let mut rasterizer = Rasterizer::new(12, 20);
/// rasterizer.draw_cubic(
/// point(10.3, 16.4),
/// point(8.6, 16.9),
/// point(7.7, 16.5),
/// point(8.2, 15.2),
/// );
/// ```
pub fn draw_cubic(&mut self, p0: Point, p1: Point, p2: Point, p3: Point) {
self.tesselate_cubic(p0, p1, p2, p3, 0);
}
// stb_truetype style cubic approximation by lines.
fn tesselate_cubic(&mut self, p0: Point, p1: Point, p2: Point, p3: Point, n: u8) {
// ...I'm not sure either ¯\_(ツ)_/¯
const OBJSPACE_FLATNESS: f32 = 0.35;
const OBJSPACE_FLATNESS_SQUARED: f32 = OBJSPACE_FLATNESS * OBJSPACE_FLATNESS;
const MAX_RECURSION_DEPTH: u8 = 16;
let longlen = p0.distance_to(p1) + p1.distance_to(p2) + p2.distance_to(p3);
let shortlen = p0.distance_to(p3);
let flatness_squared = longlen * longlen - shortlen * shortlen;
if n < MAX_RECURSION_DEPTH && flatness_squared > OBJSPACE_FLATNESS_SQUARED {
let p01 = lerp(0.5, p0, p1);
let p12 = lerp(0.5, p1, p2);
let p23 = lerp(0.5, p2, p3);
let pa = lerp(0.5, p01, p12);
let pb = lerp(0.5, p12, p23);
let mp = lerp(0.5, pa, pb);
self.tesselate_cubic(p0, p01, pa, mp, n + 1);
self.tesselate_cubic(mp, pb, p23, p3, n + 1);
} else {
self.draw_line(p0, p3);
}
}
/// Run a callback for each pixel index & alpha, with indices in `0..width * height`.
///
/// ```
/// # use ab_glyph_rasterizer::*;
/// # let (width, height) = (1, 1);
/// # let mut rasterizer = Rasterizer::new(width, height);
/// let mut pixels = vec![0u8; width * height];
/// rasterizer.for_each_pixel(|index, alpha| {
/// pixels[index] = (alpha * 255.0).round() as u8;
/// });
/// ```
pub fn for_each_pixel<O: FnMut(usize, f32)>(&self, mut px_fn: O) {
let mut acc = 0.0;
self.a[..self.width * self.height]
.iter()
.enumerate()
.for_each(|(idx, c)| {
acc += c;
px_fn(idx, acc.abs().min(1.0));
});
}
/// Run a callback for each pixel x position, y position & alpha.
///
/// Convenience wrapper for `for_each_pixel`.
///
/// ```
/// # use ab_glyph_rasterizer::*;
/// # let (width, height) = (1, 1);
/// # let mut rasterizer = Rasterizer::new(width, height);
/// # struct Img;
/// # impl Img { fn set_pixel(&self, x: u32, y: u32, a: u8) {} }
/// # let image = Img;
/// rasterizer.for_each_pixel_2d(|x, y, alpha| {
/// image.set_pixel(x, y, (alpha * 255.0).round() as u8);
/// });
/// ```
pub fn for_each_pixel_2d<O: FnMut(u32, u32, f32)>(&self, mut px_fn: O) {
let width32 = self.width as u32;
self.for_each_pixel(|idx, alpha| px_fn(idx as u32 % width32, idx as u32 / width32, alpha));
}
}
/// ```
/// let rasterizer = ab_glyph_rasterizer::Rasterizer::new(3, 4);
/// assert_eq!(&format!("{:?}", rasterizer), "Rasterizer { width: 3, height: 4 }");
/// ```
impl core::fmt::Debug for Rasterizer {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("Rasterizer")
.field("width", &self.width)
.field("height", &self.height)
.finish()
}
}

View File

@ -0,0 +1,10 @@
use ab_glyph_rasterizer::*;
/// Index oob panic rasterizing "Gauntl" using Bitter-Regular.otf
#[test]
fn rusttype_156_index_panic() {
let mut r = Rasterizer::new(6, 16);
r.draw_line(point(5.54, 14.299999), point(3.7399998, 13.799999));
r.draw_line(point(3.7399998, 13.799999), point(3.7399998, 0.0));
r.draw_line(point(3.7399998, 0.0), point(0.0, 0.10000038));
}

View File

@ -0,0 +1 @@
{"files":{"CHANGELOG.md":"042ed3158af7000c88a6617d775f11456bd30f6c7c8b5b586978faa1e11b1e24","Cargo.toml":"107d13689eecfa82a8b5ae35bf835b9d2775337226630e4bdb35f22d0dd52e18","LICENSE-0BSD":"861399f8c21c042b110517e76dc6b63a2b334276c8cf17412fc3c8908ca8dc17","LICENSE-APACHE":"8ada45cd9f843acf64e4722ae262c622a2b3b3007c7310ef36ac1061a30f6adb","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"fa83fd5ee10b61827de382e496bf66296a526e3d2c3b2aa5ad672aa15e8d2d7f","RELEASE_PROCESS.md":"a86cd10fc70f167f8d00e9e4ce0c6b4ebdfa1865058390dffd1e0ad4d3e68d9d","benches/bench.rs":"c07ce370e3680c602e415f8d1ec4e543ea2163ab22a09b6b82d93e8a30adca82","src/algo.rs":"b664b131f724a809591394a10b9023f40ab5963e32a83fa3163c2668e59c8b66","src/lib.rs":"67f3ca5b6333e22745b178b70f472514162cea2890344724f0f66995fcf19806"},"package":"ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"}

View File

@ -0,0 +1,55 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_binary",
"rust_library",
"rust_test",
)
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # MIT from expression "0BSD OR (MIT OR Apache-2.0)"
])
# Generated Targets
# Unsupported target "bench" with type "bench" omitted
rust_library(
name = "adler",
srcs = glob(["**/*.rs"]),
crate_features = [
],
crate_root = "src/lib.rs",
crate_type = "lib",
data = [],
edition = "2015",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "0.2.3",
# buildifier: leave-alone
deps = [
],
)

View File

@ -0,0 +1,33 @@
# Changelog
## Unreleased
No changes.
## [0.2.3 - 2020-07-11](https://github.com/jonas-schievink/adler/releases/tag/v0.2.3)
- Process 4 Bytes at a time, improving performance by up to 50% ([#2]).
## [0.2.2 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.2)
- Bump MSRV to 1.31.0.
## [0.2.1 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.1)
- Add a few `#[inline]` annotations to small functions.
- Fix CI badge.
- Allow integration into libstd.
## [0.2.0 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.0)
- Support `#![no_std]` when using `default-features = false`.
- Improve performance by around 7x.
- Support Rust 1.8.0.
- Improve API naming.
## [0.1.0 - 2020-06-26](https://github.com/jonas-schievink/adler/releases/tag/v0.1.0)
Initial release.
[#2]: https://github.com/jonas-schievink/adler/pull/2

View File

@ -0,0 +1,69 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "adler"
version = "0.2.3"
authors = ["Jonas Schievink <jonasschievink@gmail.com>"]
description = "A simple clean-room implementation of the Adler-32 checksum"
documentation = "https://docs.rs/adler/"
readme = "README.md"
keywords = ["checksum", "integrity", "hash", "adler32"]
categories = ["algorithms"]
license = "0BSD OR MIT OR Apache-2.0"
repository = "https://github.com/jonas-schievink/adler.git"
[package.metadata.docs.rs]
rustdoc-args = ["--cfg docsrs"]
[package.metadata.release]
no-dev-version = true
pre-release-commit-message = "Release {{version}}"
tag-message = "{{version}}"
[[package.metadata.release.pre-release-replacements]]
file = "CHANGELOG.md"
replace = "## Unreleased\n\nNo changes.\n\n## [{{version}} - {{date}}](https://github.com/jonas-schievink/adler/releases/tag/v{{version}})\n"
search = "## Unreleased\n"
[[package.metadata.release.pre-release-replacements]]
file = "README.md"
replace = "adler = \"{{version}}\""
search = "adler = \"[a-z0-9\\\\.-]+\""
[[package.metadata.release.pre-release-replacements]]
file = "src/lib.rs"
replace = "https://docs.rs/adler/{{version}}"
search = "https://docs.rs/adler/[a-z0-9\\.-]+"
[[bench]]
name = "bench"
harness = false
[dependencies.compiler_builtins]
version = "0.1.2"
optional = true
[dependencies.core]
version = "1.0.0"
optional = true
package = "rustc-std-workspace-core"
[dev-dependencies.criterion]
version = "0.3.2"
[features]
default = ["std"]
rustc-dep-of-std = ["core", "compiler_builtins"]
std = []
[badges.maintenance]
status = "actively-developed"
[badges.travis-ci]
repository = "jonas-schievink/adler"

View File

@ -0,0 +1,12 @@
Copyright (C) Jonas Schievink <jonasschievink@gmail.com>
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
https://www.apache.org/licenses/LICENSE-2.0
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,38 @@
# Adler-32 checksums for Rust
[![crates.io](https://img.shields.io/crates/v/adler.svg)](https://crates.io/crates/adler)
[![docs.rs](https://docs.rs/adler/badge.svg)](https://docs.rs/adler/)
![CI](https://github.com/jonas-schievink/adler/workflows/CI/badge.svg)
This crate provides a simple implementation of the Adler-32 checksum, used in
zlib, rsync, and other software.
Please refer to the [changelog](CHANGELOG.md) to see what changed in the last
releases.
## Features
- Permissively licensed (0BSD) clean-room implementation.
- Zero dependencies.
- Decent performance (3-4 GB/s).
- Supports `#![no_std]` (with `default-features = false`).
## Usage
Add an entry to your `Cargo.toml`:
```toml
[dependencies]
adler = "0.2.3"
```
Check the [API Documentation](https://docs.rs/adler/) for how to use the
crate's functionality.
## Rust version support
Currently, this crate supports all Rust versions starting at Rust 1.31.0.
Bumping the Minimum Supported Rust Version (MSRV) is *not* considered a breaking
change, but will not be done without good reasons. The latest 3 stable Rust
versions will always be supported no matter what.

View File

@ -0,0 +1,13 @@
# What to do to publish a new release
1. Ensure all notable changes are in the changelog under "Unreleased".
2. Execute `cargo release <level>` to bump version(s), tag and publish
everything. External subcommand, must be installed with `cargo install
cargo-release`.
`<level>` can be one of `major|minor|patch`. If this is the first release
(`0.1.0`), use `minor`, since the version starts out as `0.0.0`.
3. Go to the GitHub releases, edit the just-pushed tag. Copy the release notes
from the changelog.

View File

@ -0,0 +1,109 @@
extern crate adler;
extern crate criterion;
use adler::{adler32_slice, Adler32};
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
fn simple(c: &mut Criterion) {
{
const SIZE: usize = 100;
let mut group = c.benchmark_group("simple-100b");
group.throughput(Throughput::Bytes(SIZE as u64));
group.bench_function("zeroes-100", |bencher| {
bencher.iter(|| {
adler32_slice(&[0; SIZE]);
});
});
group.bench_function("ones-100", |bencher| {
bencher.iter(|| {
adler32_slice(&[0xff; SIZE]);
});
});
}
{
const SIZE: usize = 1024;
let mut group = c.benchmark_group("simple-1k");
group.throughput(Throughput::Bytes(SIZE as u64));
group.bench_function("zeroes-1k", |bencher| {
bencher.iter(|| {
adler32_slice(&[0; SIZE]);
});
});
group.bench_function("ones-1k", |bencher| {
bencher.iter(|| {
adler32_slice(&[0xff; SIZE]);
});
});
}
{
const SIZE: usize = 1024 * 1024;
let mut group = c.benchmark_group("simple-1m");
group.throughput(Throughput::Bytes(SIZE as u64));
group.bench_function("zeroes-1m", |bencher| {
bencher.iter(|| {
adler32_slice(&[0; SIZE]);
});
});
group.bench_function("ones-1m", |bencher| {
bencher.iter(|| {
adler32_slice(&[0xff; SIZE]);
});
});
}
}
fn chunked(c: &mut Criterion) {
const SIZE: usize = 16 * 1024 * 1024;
let data = vec![0xAB; SIZE];
let mut group = c.benchmark_group("chunked-16m");
group.throughput(Throughput::Bytes(SIZE as u64));
group.bench_function("5552", |bencher| {
bencher.iter(|| {
let mut h = Adler32::new();
for chunk in data.chunks(5552) {
h.write_slice(chunk);
}
h.checksum()
});
});
group.bench_function("8k", |bencher| {
bencher.iter(|| {
let mut h = Adler32::new();
for chunk in data.chunks(8 * 1024) {
h.write_slice(chunk);
}
h.checksum()
});
});
group.bench_function("64k", |bencher| {
bencher.iter(|| {
let mut h = Adler32::new();
for chunk in data.chunks(64 * 1024) {
h.write_slice(chunk);
}
h.checksum()
});
});
group.bench_function("1m", |bencher| {
bencher.iter(|| {
let mut h = Adler32::new();
for chunk in data.chunks(1024 * 1024) {
h.write_slice(chunk);
}
h.checksum()
});
});
}
criterion_group!(benches, simple, chunked);
criterion_main!(benches);

View File

@ -0,0 +1,146 @@
use crate::Adler32;
use std::ops::{AddAssign, MulAssign, RemAssign};
impl Adler32 {
pub(crate) fn compute(&mut self, bytes: &[u8]) {
// The basic algorithm is, for every byte:
// a = (a + byte) % MOD
// b = (b + a) % MOD
// where MOD = 65521.
//
// For efficiency, we can defer the `% MOD` operations as long as neither a nor b overflows:
// - Between calls to `write`, we ensure that a and b are always in range 0..MOD.
// - We use 32-bit arithmetic in this function.
// - Therefore, a and b must not increase by more than 2^32-MOD without performing a `% MOD`
// operation.
//
// According to Wikipedia, b is calculated as follows for non-incremental checksumming:
// b = n×D1 + (n1)×D2 + (n2)×D3 + ... + Dn + n*1 (mod 65521)
// Where n is the number of bytes and Di is the i-th Byte. We need to change this to account
// for the previous values of a and b, as well as treat every input Byte as being 255:
// b_inc = n×255 + (n-1)×255 + ... + 255 + n*65520
// Or in other words:
// b_inc = n*65520 + n(n+1)/2*255
// The max chunk size is thus the largest value of n so that b_inc <= 2^32-65521.
// 2^32-65521 = n*65520 + n(n+1)/2*255
// Plugging this into an equation solver since I can't math gives n = 5552.18..., so 5552.
//
// On top of the optimization outlined above, the algorithm can also be parallelized with a
// bit more work:
//
// Note that b is a linear combination of a vector of input bytes (D1, ..., Dn).
//
// If we fix some value k<N and rewrite indices 1, ..., N as
//
// 1_1, 1_2, ..., 1_k, 2_1, ..., 2_k, ..., (N/k)_k,
//
// then we can express a and b in terms of sums of smaller sequences kb and ka:
//
// ka(j) := D1_j + D2_j + ... + D(N/k)_j where j <= k
// kb(j) := (N/k)*D1_j + (N/k-1)*D2_j + ... + D(N/k)_j where j <= k
//
// a = ka(1) + ka(2) + ... + ka(k) + 1
// b = k*(kb(1) + kb(2) + ... + kb(k)) - 1*ka(2) - ... - (k-1)*ka(k) + N
//
// We use this insight to unroll the main loop and process k=4 bytes at a time.
// The resulting code is highly amenable to SIMD acceleration, although the immediate speedups
// stem from increased pipeline parallelism rather than auto-vectorization.
//
// This technique is described in-depth (here:)[https://software.intel.com/content/www/us/\
// en/develop/articles/fast-computation-of-fletcher-checksums.html]
const MOD: u32 = 65521;
const CHUNK_SIZE: usize = 5552 * 4;
let mut a = u32::from(self.a);
let mut b = u32::from(self.b);
let mut a_vec = U32X4([0; 4]);
let mut b_vec = a_vec;
let (bytes, remainder) = bytes.split_at(bytes.len() - bytes.len() % 4);
// iterate over 4 bytes at a time
let chunk_iter = bytes.chunks_exact(CHUNK_SIZE);
let remainder_chunk = chunk_iter.remainder();
for chunk in chunk_iter {
for byte_vec in chunk.chunks_exact(4) {
let val = U32X4::from(byte_vec);
a_vec += val;
b_vec += a_vec;
}
b += CHUNK_SIZE as u32 * a;
a_vec %= MOD;
b_vec %= MOD;
b %= MOD;
}
// special-case the final chunk because it may be shorter than the rest
for byte_vec in remainder_chunk.chunks_exact(4) {
let val = U32X4::from(byte_vec);
a_vec += val;
b_vec += a_vec;
}
b += remainder_chunk.len() as u32 * a;
a_vec %= MOD;
b_vec %= MOD;
b %= MOD;
// combine the sub-sum results into the main sum
b_vec *= 4;
b_vec.0[1] += MOD - a_vec.0[1];
b_vec.0[2] += (MOD - a_vec.0[2]) * 2;
b_vec.0[3] += (MOD - a_vec.0[3]) * 3;
for &av in a_vec.0.iter() {
a += av;
}
for &bv in b_vec.0.iter() {
b += bv;
}
// iterate over the remaining few bytes in serial
for &byte in remainder.iter() {
a += u32::from(byte);
b += a;
}
self.a = (a % MOD) as u16;
self.b = (b % MOD) as u16;
}
}
#[derive(Copy, Clone)]
struct U32X4([u32; 4]);
impl U32X4 {
fn from(bytes: &[u8]) -> Self {
U32X4([
u32::from(bytes[0]),
u32::from(bytes[1]),
u32::from(bytes[2]),
u32::from(bytes[3]),
])
}
}
impl AddAssign<Self> for U32X4 {
fn add_assign(&mut self, other: Self) {
for (s, o) in self.0.iter_mut().zip(other.0.iter()) {
*s += o;
}
}
}
impl RemAssign<u32> for U32X4 {
fn rem_assign(&mut self, quotient: u32) {
for s in self.0.iter_mut() {
*s %= quotient;
}
}
}
impl MulAssign<u32> for U32X4 {
fn mul_assign(&mut self, rhs: u32) {
for s in self.0.iter_mut() {
*s *= rhs;
}
}
}

View File

@ -0,0 +1,215 @@
//! Adler-32 checksum implementation.
//!
//! This implementation features:
//!
//! - Permissively licensed (0BSD) clean-room implementation.
//! - Zero dependencies.
//! - Decent performance (3-4 GB/s).
//! - `#![no_std]` support (with `default-features = false`).
#![doc(html_root_url = "https://docs.rs/adler/0.2.3")]
// Deny a few warnings in doctests, since rustdoc `allow`s many warnings by default
#![doc(test(attr(deny(unused_imports, unused_must_use))))]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![warn(missing_debug_implementations)]
#![forbid(unsafe_code)]
#![cfg_attr(not(feature = "std"), no_std)]
#[cfg(not(feature = "std"))]
extern crate core as std;
mod algo;
use std::hash::Hasher;
#[cfg(feature = "std")]
use std::io::{self, BufRead};
/// Adler-32 checksum calculator.
///
/// An instance of this type is equivalent to an Adler-32 checksum: It can be created in the default
/// state via [`new`] (or the provided `Default` impl), or from a precalculated checksum via
/// [`from_checksum`], and the currently stored checksum can be fetched via [`checksum`].
///
/// This type also implements `Hasher`, which makes it easy to calculate Adler-32 checksums of any
/// type that implements or derives `Hash`. This also allows using Adler-32 in a `HashMap`, although
/// that is not recommended (while every checksum is a hash, they are not necessarily good at being
/// one).
///
/// [`new`]: #method.new
/// [`from_checksum`]: #method.from_checksum
/// [`checksum`]: #method.checksum
#[derive(Debug, Copy, Clone)]
pub struct Adler32 {
a: u16,
b: u16,
}
impl Adler32 {
/// Creates a new Adler-32 instance with default state.
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Creates an `Adler32` instance from a precomputed Adler-32 checksum.
///
/// This allows resuming checksum calculation without having to keep the `Adler32` instance
/// around.
///
/// # Example
///
/// ```
/// # use adler::Adler32;
/// let parts = [
/// "rust",
/// "acean",
/// ];
/// let whole = adler::adler32_slice(b"rustacean");
///
/// let mut sum = Adler32::new();
/// sum.write_slice(parts[0].as_bytes());
/// let partial = sum.checksum();
///
/// // ...later
///
/// let mut sum = Adler32::from_checksum(partial);
/// sum.write_slice(parts[1].as_bytes());
/// assert_eq!(sum.checksum(), whole);
/// ```
#[inline]
pub fn from_checksum(sum: u32) -> Self {
Adler32 {
a: sum as u16,
b: (sum >> 16) as u16,
}
}
/// Returns the calculated checksum at this point in time.
#[inline]
pub fn checksum(&self) -> u32 {
(u32::from(self.b) << 16) | u32::from(self.a)
}
/// Adds `bytes` to the checksum calculation.
///
/// If efficiency matters, this should be called with Byte slices that contain at least a few
/// thousand Bytes.
pub fn write_slice(&mut self, bytes: &[u8]) {
self.compute(bytes);
}
}
impl Default for Adler32 {
#[inline]
fn default() -> Self {
Adler32 { a: 1, b: 0 }
}
}
impl Hasher for Adler32 {
#[inline]
fn finish(&self) -> u64 {
u64::from(self.checksum())
}
fn write(&mut self, bytes: &[u8]) {
self.write_slice(bytes);
}
}
/// Calculates the Adler-32 checksum of a byte slice.
pub fn adler32_slice(data: &[u8]) -> u32 {
let mut h = Adler32::new();
h.write_slice(data);
h.checksum()
}
/// Calculates the Adler-32 checksum of a `BufRead`'s contents.
///
/// The passed `BufRead` implementor will be read until it reaches EOF.
///
/// If you only have a `Read` implementor, wrap it in `std::io::BufReader`.
#[cfg(feature = "std")]
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
pub fn adler32_reader<R: BufRead>(reader: &mut R) -> io::Result<u32> {
let mut h = Adler32::new();
loop {
let len = {
let buf = reader.fill_buf()?;
if buf.is_empty() {
return Ok(h.checksum());
}
h.write_slice(buf);
buf.len()
};
reader.consume(len);
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::BufReader;
#[test]
fn zeroes() {
assert_eq!(adler32_slice(&[]), 1);
assert_eq!(adler32_slice(&[0]), 1 | 1 << 16);
assert_eq!(adler32_slice(&[0, 0]), 1 | 2 << 16);
assert_eq!(adler32_slice(&[0; 100]), 0x00640001);
assert_eq!(adler32_slice(&[0; 1024]), 0x04000001);
assert_eq!(adler32_slice(&[0; 1024 * 1024]), 0x00f00001);
}
#[test]
fn ones() {
assert_eq!(adler32_slice(&[0xff; 1024]), 0x79a6fc2e);
assert_eq!(adler32_slice(&[0xff; 1024 * 1024]), 0x8e88ef11);
}
#[test]
fn mixed() {
assert_eq!(adler32_slice(&[1]), 2 | 2 << 16);
assert_eq!(adler32_slice(&[40]), 41 | 41 << 16);
assert_eq!(adler32_slice(&[0xA5; 1024 * 1024]), 0xd5009ab1);
}
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
#[test]
fn wiki() {
assert_eq!(adler32_slice(b"Wikipedia"), 0x11E60398);
}
#[test]
fn resume() {
let mut adler = Adler32::new();
adler.write_slice(&[0xff; 1024]);
let partial = adler.checksum();
assert_eq!(partial, 0x79a6fc2e); // from above
adler.write_slice(&[0xff; 1024 * 1024 - 1024]);
assert_eq!(adler.checksum(), 0x8e88ef11); // from above
// Make sure that we can resume computing from the partial checksum via `from_checksum`.
let mut adler = Adler32::from_checksum(partial);
adler.write_slice(&[0xff; 1024 * 1024 - 1024]);
assert_eq!(adler.checksum(), 0x8e88ef11); // from above
}
#[test]
fn bufread() {
fn test(data: &[u8], checksum: u32) {
// `BufReader` uses an 8 KB buffer, so this will test buffer refilling.
let mut buf = BufReader::new(data);
let real_sum = adler32_reader(&mut buf).unwrap();
assert_eq!(checksum, real_sum);
}
test(&[], 1);
test(&[0; 1024], 0x04000001);
test(&[0; 1024 * 1024], 0x00f00001);
test(&[0xA5; 1024 * 1024], 0xd5009ab1);
}
}

View File

@ -1 +0,0 @@
{"files":{"Cargo.toml":"3dfd0367a0af86dd57c4faf9f8a5b1ce8179c38e28d470d3c46ce2d2b45ef20f","LICENSE":"9efeecf73f68ed91830f71c69a53de1328d1f8c6968a68ca6e6b2d6f3a92a088","README.md":"77c9e2080e5ae700403343c27fe08bb616f1df92a8b42b0e7808a7b7d32eb7a2","appveyor.yml":"4873092bae0713890497e5ceae761af359d680e6cce5ce003bf38bc5c45cde44","src/lib.rs":"596ac0c2bbdfa759fb79eb7b7d9e18d6c51be0849f22204a85c4906fe2ae8bde"},"package":"5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"}

View File

@ -1,53 +0,0 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_binary",
"rust_library",
"rust_test",
)
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # Zlib from expression "Zlib"
])
# Generated Targets
rust_library(
name = "adler32",
srcs = glob(["**/*.rs"]),
crate_features = [
],
crate_root = "src/lib.rs",
crate_type = "lib",
data = [],
edition = "2015",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "1.0.4",
# buildifier: leave-alone
deps = [
],
)

View File

@ -1,24 +0,0 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "adler32"
version = "1.0.4"
authors = ["Remi Rampin <remirampin@gmail.com>"]
description = "Minimal Adler32 implementation for Rust."
documentation = "https://remram44.github.io/adler32-rs/index.html"
readme = "README.md"
keywords = ["adler32", "hash", "rolling"]
license = "Zlib"
repository = "https://github.com/remram44/adler32-rs"
[dev-dependencies.rand]
version = "0.4"

View File

@ -1,43 +0,0 @@
Copyright notice for the Rust port:
(C) 2016 Remi Rampin
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Copyright notice for the original C code from the zlib project:
(C) 1995-2017 Jean-loup Gailly and Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu

View File

@ -1,13 +0,0 @@
[![Build Status](https://travis-ci.org/remram44/adler32-rs.svg?branch=master)](https://travis-ci.org/remram44/adler32-rs/builds)
[![Win Build](https://ci.appveyor.com/api/projects/status/ekyg20rd6rwrus64/branch/master?svg=true)](https://ci.appveyor.com/project/remram44/adler32-rs)
[![Crates.io](https://img.shields.io/crates/v/adler32.svg)](https://crates.io/crates/adler32)
[![Say Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/remram44)
What is this?
=============
It is an implementation of the [Adler32 rolling hash algorithm](https://en.wikipedia.org/wiki/Adler-32) in the [Rust programming language](https://www.rust-lang.org/).
It is adapted from Jean-Loup Gailly's and Mark Adler's [original implementation in zlib](https://github.com/madler/zlib/blob/2fa463bacfff79181df1a5270fb67cc679a53e71/adler32.c). A copy of the zlib copyright and license can be found in LICENSE-ZLIB.
[Generated documentation](https://remram44.github.io/adler32-rs/index.html)

View File

@ -1,12 +0,0 @@
install:
- ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-nightly-i686-pc-windows-gnu.exe'
- rust-nightly-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust"
- set PATH=%PATH%;C:\Program Files (x86)\Rust\bin
- rustc -V
- cargo -V
build: false
test_script:
- cargo build --verbose
- cargo test --verbose

View File

@ -1,307 +0,0 @@
//! A minimal implementation of Adler32 for Rust.
//!
//! This provides the simple method adler32(), that exhausts a Read and
//! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
//! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
//! fashion.
//!
//! The adler32 code has been translated (as accurately as I could manage) from
//! the zlib implementation.
#[cfg(test)]
extern crate rand;
use std::io;
// adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
// It was translated into Rust as accurately as I could manage
// The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
/* zlib.h -- interface of the 'zlib' general purpose compression library
version 1.2.8, April 28th, 2013
Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
*/
// largest prime smaller than 65536
const BASE: u32 = 65521;
// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
const NMAX: usize = 5552;
#[inline(always)]
fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
*adler += u32::from(buf[0]);
*sum2 += *adler;
}
#[inline(always)]
fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
do1(adler, sum2, &buf[0..1]);
do1(adler, sum2, &buf[1..2]);
}
#[inline(always)]
fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
do2(adler, sum2, &buf[0..2]);
do2(adler, sum2, &buf[2..4]);
}
#[inline(always)]
fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
do4(adler, sum2, &buf[0..4]);
do4(adler, sum2, &buf[4..8]);
}
#[inline(always)]
fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
do8(adler, sum2, &buf[0..8]);
do8(adler, sum2, &buf[8..16]);
}
/// A rolling version of the Adler32 hash, which can 'forget' past bytes.
///
/// Calling remove() will update the hash to the value it would have if that
/// past byte had never been fed to the algorithm. This allows you to get the
/// hash of a rolling window very efficiently.
pub struct RollingAdler32 {
a: u32,
b: u32,
}
impl Default for RollingAdler32 {
fn default() -> RollingAdler32 {
RollingAdler32::new()
}
}
impl RollingAdler32 {
/// Creates an empty Adler32 context (with hash 1).
pub fn new() -> RollingAdler32 {
Self::from_value(1)
}
/// Creates an Adler32 context with the given initial value.
pub fn from_value(adler32: u32) -> RollingAdler32 {
let a = adler32 & 0xFFFF;
let b = adler32 >> 16;
RollingAdler32 { a, b }
}
/// Convenience function initializing a context from the hash of a buffer.
pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
let mut hash = RollingAdler32::new();
hash.update_buffer(buffer);
hash
}
/// Returns the current hash.
pub fn hash(&self) -> u32 {
(self.b << 16) | self.a
}
/// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
pub fn remove(&mut self, size: usize, byte: u8) {
let byte = u32::from(byte);
self.a = (self.a + BASE - byte) % BASE;
self.b = ((self.b + BASE - 1)
.wrapping_add(BASE.wrapping_sub(size as u32)
.wrapping_mul(byte))) % BASE;
}
/// Feeds a new `byte` to the algorithm to update the hash.
pub fn update(&mut self, byte: u8) {
let byte = u32::from(byte);
self.a = (self.a + byte) % BASE;
self.b = (self.b + self.a) % BASE;
}
/// Feeds a vector of bytes to the algorithm to update the hash.
pub fn update_buffer(&mut self, buffer: &[u8]) {
let len = buffer.len();
// in case user likes doing a byte at a time, keep it fast
if len == 1 {
self.update(buffer[0]);
return;
}
// in case short lengths are provided, keep it somewhat fast
if len < 16 {
for byte in buffer.iter().take(len) {
self.a += u32::from(*byte);
self.b += self.a;
}
if self.a >= BASE {
self.a -= BASE;
}
self.b %= BASE;
return;
}
let mut pos = 0;
// do length NMAX blocks -- requires just one modulo operation;
while pos + NMAX <= len {
let end = pos + NMAX;
while pos < end {
// 16 sums unrolled
do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
pos += 16;
}
self.a %= BASE;
self.b %= BASE;
}
// do remaining bytes (less than NMAX, still just one modulo)
if pos < len { // avoid modulos if none remaining
while len - pos >= 16 {
do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
pos += 16;
}
while len - pos > 0 {
self.a += u32::from(buffer[pos]);
self.b += self.a;
pos += 1;
}
self.a %= BASE;
self.b %= BASE;
}
}
}
/// Consume a Read object and returns the Adler32 hash.
pub fn adler32<R: io::Read>(mut reader: R) -> io::Result<u32> {
let mut hash = RollingAdler32::new();
let mut buffer = [0u8; NMAX];
let mut read = try!(reader.read(&mut buffer));
while read > 0 {
hash.update_buffer(&buffer[..read]);
read = try!(reader.read(&mut buffer));
}
Ok(hash.hash())
}
#[cfg(test)]
mod test {
use rand;
use rand::Rng;
use std::io;
use super::{BASE, adler32, RollingAdler32};
fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
let mut a: u32 = 1;
let mut b: u32 = 0;
for byte in reader.bytes() {
let byte = try!(byte) as u32;
a = (a + byte) % BASE;
b = (b + a) % BASE;
}
Ok((b << 16) | a)
}
#[test]
fn testvectors() {
fn do_test(v: u32, bytes: &[u8]) {
let mut hash = RollingAdler32::new();
hash.update_buffer(&bytes);
assert_eq!(hash.hash(), v);
let r = io::Cursor::new(bytes);
assert_eq!(adler32(r).unwrap(), v);
}
do_test(0x00000001, b"");
do_test(0x00620062, b"a");
do_test(0x024d0127, b"abc");
do_test(0x29750586, b"message digest");
do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
do_test(0x8adb150c, b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
abcdefghijklmnopqrstuvwxyz\
0123456789");
do_test(0x97b61069, b"1234567890123456789012345678901234567890\
1234567890123456789012345678901234567890");
do_test(0xD6251498, &[255; 64000]);
}
#[test]
fn compare() {
let mut rng = rand::thread_rng();
let mut data = vec![0u8; 5589];
for size in [0, 1, 3, 4, 5, 31, 32, 33, 67,
5550, 5552, 5553, 5568, 5584, 5589].iter().cloned() {
rng.fill_bytes(&mut data[..size]);
let r1 = io::Cursor::new(&data[..size]);
let r2 = r1.clone();
if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
panic!("Comparison failed, size={}", size);
}
}
}
#[test]
fn rolling() {
assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
fn do_test(a: &[u8], b: &[u8]) {
let mut total = Vec::with_capacity(a.len() + b.len());
total.extend(a);
total.extend(b);
let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
for i in 0..(a.len()) {
h.remove(b.len(), a[i]);
h.update(total[b.len() + i]);
}
assert_eq!(h.hash(), adler32(b).unwrap());
}
do_test(b"a", b"b");
do_test(b"", b"this a test");
do_test(b"th", b"is a test");
do_test(b"this a ", b"test");
}
#[test]
fn long_window_remove() {
let mut hash = RollingAdler32::new();
let w = 65536;
assert!(w as u32 > BASE);
let mut bytes = vec![0; w*3];
for (i, b) in bytes.iter_mut().enumerate() {
*b = i as u8;
}
for (i, b) in bytes.iter().enumerate() {
if i >= w {
hash.remove(w, bytes[i - w]);
}
hash.update(*b);
if i > 0 && i % w == 0 {
assert_eq!(hash.hash(), 0x433a8772);
}
}
assert_eq!(hash.hash(), 0xbbba8772);
}
}

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"9e33701960053fa4c3fe2e5700bdd1fc17c6a7ff3d1cd617cb4f23cc01123a0c","LICENSE":"f5af8beef8f5f88f1b78494703bbfa019c4f3630ac111344390d6f9975ab22ed","README.md":"022d9b80f7ecec822a9f005f311d990f94a061970e7b982c85978675ff48de17","src/bench.rs":"bf3353d119660f44e4c2ef06d34c74e9585984cd7a82df609d51250476bdf2d0","src/lib.rs":"4f203fd48b12052f950213249a55db0b1c6cde93fe3bdf26b70a6eb42c6c9dee"},"package":"aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"}

View File

@ -0,0 +1,57 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_binary",
"rust_library",
"rust_test",
)
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # Zlib from expression "Zlib"
])
# Generated Targets
# Unsupported target "bench" with type "bench" omitted
rust_library(
name = "adler32",
srcs = glob(["**/*.rs"]),
crate_features = [
"default",
"std",
],
crate_root = "src/lib.rs",
crate_type = "lib",
data = [],
edition = "2018",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "1.2.0",
# buildifier: leave-alone
deps = [
],
)

View File

@ -0,0 +1,61 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "adler32"
version = "1.2.0"
authors = ["Remi Rampin <remirampin@gmail.com>"]
description = "Minimal Adler32 implementation for Rust."
documentation = "https://docs.rs/adler32/"
readme = "README.md"
keywords = ["adler32", "hash", "rolling"]
license = "Zlib"
repository = "https://github.com/remram44/adler32-rs"
[lib]
bench = false
[[bench]]
name = "bench"
path = "src/bench.rs"
harness = false
[dependencies.compiler_builtins]
version = "0.1.2"
optional = true
[dependencies.core]
version = "1.0.0"
optional = true
package = "rustc-std-workspace-core"
[dev-dependencies.criterion]
version = "0.3"
[dev-dependencies.getrandom]
version = "0.1"
features = ["wasm-bindgen"]
[dev-dependencies.humansize]
version = "1.1"
[dev-dependencies.rand]
version = "0.7"
[features]
default = ["std"]
rustc-dep-of-std = ["core", "compiler_builtins"]
std = []
[target."cfg(target_arch = \"wasm32\")".dev-dependencies.wasm-bindgen]
version = "0.2.63"
[target."cfg(target_arch = \"wasm32\")".dev-dependencies.wasm-bindgen-test]
version = "0.3"

View File

@ -0,0 +1,43 @@
Copyright notice for the Rust port:
(C) 2016 Remi Rampin and adler32-rs contributors
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Copyright notice for the original C code from the zlib project:
(C) 1995-2017 Jean-loup Gailly and Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu

View File

@ -0,0 +1,17 @@
[![Build Status](https://github.com/remram44/adler32-rs/workflows/Test/badge.svg)](https://github.com/remram44/adler32-rs/actions)
[![Win Build](https://ci.appveyor.com/api/projects/status/ekyg20rd6rwrus64/branch/master?svg=true)](https://ci.appveyor.com/project/remram44/adler32-rs)
[![Crates.io](https://img.shields.io/crates/v/adler32.svg)](https://crates.io/crates/adler32)
[![Documentation](https://docs.rs/adler32/badge.svg)](https://docs.rs/adler32)
[![License](https://img.shields.io/crates/l/adler32.svg)](https://github.com/remram44/adler32-rs/blob/master/LICENSE)
What is this?
=============
It is an implementation of the [Adler32 rolling hash algorithm](https://en.wikipedia.org/wiki/Adler-32) in the [Rust programming language](https://www.rust-lang.org/).
It is adapted from Jean-Loup Gailly's and Mark Adler's [original implementation in zlib](https://github.com/madler/zlib/blob/2fa463bacfff79181df1a5270fb67cc679a53e71/adler32.c).
#### Minimum Supported Version of Rust (MSRV)
`adler32-rs` can be built with Rust version 1.33 or later. This version may be raised in the future but that will be accompanied by a minor version increase.

View File

@ -0,0 +1,30 @@
use adler32::RollingAdler32;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use humansize::{file_size_opts, FileSize};
use rand::Rng;
fn bench_update_buffer(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let mut group = c.benchmark_group("update_buffer");
for &size in [512, 100 * 1024].iter() {
let mut adler = RollingAdler32::new();
let formatted_size = size.file_size(file_size_opts::BINARY).unwrap();
let in_bytes = {
let mut in_bytes = vec![0u8; size];
rng.fill(&mut in_bytes[..]);
in_bytes
};
group.throughput(Throughput::Bytes(size as u64));
group.bench_with_input(
BenchmarkId::from_parameter(formatted_size),
&in_bytes,
|b, data| {
b.iter(|| adler.update_buffer(data));
},
);
}
}
criterion_group!(bench_default, bench_update_buffer);
criterion_main!(bench_default);

View File

@ -0,0 +1,324 @@
//! A minimal implementation of Adler32 for Rust.
//!
//! This provides the simple method adler32(), that exhausts a Read and
//! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
//! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
//! fashion.
//!
//! The adler32 code has been translated (as accurately as I could manage) from
//! the zlib implementation.
#![forbid(unsafe_code)]
#![cfg_attr(not(feature = "std"), no_std)]
// adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
// It was translated into Rust as accurately as I could manage
// The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
/* zlib.h -- interface of the 'zlib' general purpose compression library
version 1.2.8, April 28th, 2013
Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
*/
// largest prime smaller than 65536
const BASE: u32 = 65521;
// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
const NMAX: usize = 5552;
#[inline(always)]
fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
*adler += u32::from(buf[0]);
*sum2 += *adler;
}
#[inline(always)]
fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
do1(adler, sum2, &buf[0..1]);
do1(adler, sum2, &buf[1..2]);
}
#[inline(always)]
fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
do2(adler, sum2, &buf[0..2]);
do2(adler, sum2, &buf[2..4]);
}
#[inline(always)]
fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
do4(adler, sum2, &buf[0..4]);
do4(adler, sum2, &buf[4..8]);
}
#[inline(always)]
fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
do8(adler, sum2, &buf[0..8]);
do8(adler, sum2, &buf[8..16]);
}
/// A rolling version of the Adler32 hash, which can 'forget' past bytes.
///
/// Calling remove() will update the hash to the value it would have if that
/// past byte had never been fed to the algorithm. This allows you to get the
/// hash of a rolling window very efficiently.
#[derive(Clone)]
pub struct RollingAdler32 {
a: u32,
b: u32,
}
impl Default for RollingAdler32 {
fn default() -> RollingAdler32 {
RollingAdler32::new()
}
}
impl RollingAdler32 {
/// Creates an empty Adler32 context (with hash 1).
pub fn new() -> RollingAdler32 {
Self::from_value(1)
}
/// Creates an Adler32 context with the given initial value.
pub fn from_value(adler32: u32) -> RollingAdler32 {
let a = adler32 & 0xFFFF;
let b = adler32 >> 16;
RollingAdler32 { a, b }
}
/// Convenience function initializing a context from the hash of a buffer.
pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
let mut hash = RollingAdler32::new();
hash.update_buffer(buffer);
hash
}
/// Returns the current hash.
pub fn hash(&self) -> u32 {
(self.b << 16) | self.a
}
/// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
pub fn remove(&mut self, size: usize, byte: u8) {
let byte = u32::from(byte);
self.a = (self.a + BASE - byte) % BASE;
self.b = ((self.b + BASE - 1)
.wrapping_add(BASE.wrapping_sub(size as u32).wrapping_mul(byte)))
% BASE;
}
/// Feeds a new `byte` to the algorithm to update the hash.
pub fn update(&mut self, byte: u8) {
let byte = u32::from(byte);
self.a = (self.a + byte) % BASE;
self.b = (self.b + self.a) % BASE;
}
/// Feeds a vector of bytes to the algorithm to update the hash.
pub fn update_buffer(&mut self, buffer: &[u8]) {
let len = buffer.len();
// in case user likes doing a byte at a time, keep it fast
if len == 1 {
self.update(buffer[0]);
return;
}
// in case short lengths are provided, keep it somewhat fast
if len < 16 {
for byte in buffer.iter().take(len) {
self.a += u32::from(*byte);
self.b += self.a;
}
if self.a >= BASE {
self.a -= BASE;
}
self.b %= BASE;
return;
}
let mut pos = 0;
// do length NMAX blocks -- requires just one modulo operation;
while pos + NMAX <= len {
let end = pos + NMAX;
while pos < end {
// 16 sums unrolled
do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
pos += 16;
}
self.a %= BASE;
self.b %= BASE;
}
// do remaining bytes (less than NMAX, still just one modulo)
if pos < len {
// avoid modulos if none remaining
while len - pos >= 16 {
do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
pos += 16;
}
while len - pos > 0 {
self.a += u32::from(buffer[pos]);
self.b += self.a;
pos += 1;
}
self.a %= BASE;
self.b %= BASE;
}
}
}
/// Consume a Read object and returns the Adler32 hash.
#[cfg(feature = "std")]
pub fn adler32<R: std::io::Read>(mut reader: R) -> std::io::Result<u32> {
let mut hash = RollingAdler32::new();
let mut buffer = [0u8; NMAX];
let mut read = reader.read(&mut buffer)?;
while read > 0 {
hash.update_buffer(&buffer[..read]);
read = reader.read(&mut buffer)?;
}
Ok(hash.hash())
}
#[cfg(test)]
mod test {
use rand::Rng;
use std::io;
#[cfg(target_arch = "wasm32")]
use wasm_bindgen_test::wasm_bindgen_test;
use super::{adler32, RollingAdler32, BASE};
fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
let mut a: u32 = 1;
let mut b: u32 = 0;
for byte in reader.bytes() {
let byte = byte? as u32;
a = (a + byte) % BASE;
b = (b + a) % BASE;
}
Ok((b << 16) | a)
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn testvectors() {
fn do_test(v: u32, bytes: &[u8]) {
let mut hash = RollingAdler32::new();
hash.update_buffer(&bytes);
assert_eq!(hash.hash(), v);
let r = io::Cursor::new(bytes);
assert_eq!(adler32(r).unwrap(), v);
}
do_test(0x00000001, b"");
do_test(0x00620062, b"a");
do_test(0x024d0127, b"abc");
do_test(0x29750586, b"message digest");
do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
do_test(
0x8adb150c,
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
abcdefghijklmnopqrstuvwxyz\
0123456789",
);
do_test(
0x97b61069,
b"1234567890123456789012345678901234567890\
1234567890123456789012345678901234567890",
);
do_test(0xD6251498, &[255; 64000]);
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn compare() {
let mut rng = rand::thread_rng();
let mut data = vec![0u8; 5589];
for size in [
0, 1, 3, 4, 5, 31, 32, 33, 67, 5550, 5552, 5553, 5568, 5584, 5589,
]
.iter()
.cloned()
{
rng.fill(&mut data[..size]);
let r1 = io::Cursor::new(&data[..size]);
let r2 = r1.clone();
if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
panic!("Comparison failed, size={}", size);
}
}
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn rolling() {
assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
fn do_test(a: &[u8], b: &[u8]) {
let mut total = Vec::with_capacity(a.len() + b.len());
total.extend(a);
total.extend(b);
let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
for i in 0..(a.len()) {
h.remove(b.len(), a[i]);
h.update(total[b.len() + i]);
}
assert_eq!(h.hash(), adler32(b).unwrap());
}
do_test(b"a", b"b");
do_test(b"", b"this a test");
do_test(b"th", b"is a test");
do_test(b"this a ", b"test");
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn long_window_remove() {
let mut hash = RollingAdler32::new();
let w = 65536;
assert!(w as u32 > BASE);
let mut bytes = vec![0; w * 3];
for (i, b) in bytes.iter_mut().enumerate() {
*b = i as u8;
}
for (i, b) in bytes.iter().enumerate() {
if i >= w {
hash.remove(w, bytes[i - w]);
}
hash.update(*b);
if i > 0 && i % w == 0 {
assert_eq!(hash.hash(), 0x433a8772);
}
}
assert_eq!(hash.hash(), 0xbbba8772);
}
}

View File

@ -1 +0,0 @@
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"bf3140f591d5e16e2c178bfdc39bc2ea9fecf3b50963ff60343d3e5a68d024cc","DESIGN.md":"9065f33d818d1562244d36dc4781e2a351108030cee17f11c2ba512ca7b4c27e","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"f679a3a8fa99694e00e2ed8ec9cd6f7dc28eee36f47c472411744aabb0556d0b","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"46c57a83a75a8f25fdf19a15deae10748d12b8af9445ae74700a546a92024608","src/automaton.rs":"ea3fc2648e026eac9f9969b0d457e49af7b4a40044379ce010d054f22afbc98f","src/buffer.rs":"0641828d1058b9c1c16e8e2445ce05c94b0ad2d97736a7e3cd8b913fa8edd7fd","src/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/classes.rs":"166c9f15c9a2e370e2bc9a9e1620bb2db13df52edfde9a0db1f20144519a7e72","src/dfa.rs":"e34f485a7c3257d2edee16fcdb2a5586aa0d2aa9e34d624288eb2b5a0f7cc65b","src/error.rs":"36dbf2cefbfaa8a69186551320dbff023d3e82780a6c925e87c3e3997b967e66","src/lib.rs":"a2a65d72cbe1eed1964c3fb080e5fa54245ab208a3c855531c1036f05e073452","src/nfa.rs":"6e8fe7633033c378e5487604a2772af3fc2eca011fe374fe0b6d6cee98198f58","src/packed/api.rs":"6c65dfa177b7d7b79f90a048f260bec7f817126c693b85f49704c7d2ecf5f646","src/packed/mod.rs":"29c76ad3cbb1f831140cefac7a27fb504ac4af4f454975a571965b48aad417eb","src/packed/pattern.rs":"b88c57af057997da0a5a06f4c5604a7e598c20acfc11c15cd8977727f6e1cf9c","src/packed/rabinkarp.rs":"b3242a8631ea5607163dcbb641e4ac9c6da26774378da1e51651b0ab5656b390","src/packed/teddy/README.md":"5819f40d221af93288e705eadef5393a41d7a0900881b4d676e01fd65d5adf15","src/packed/teddy/compile.rs":"5d7de6a45a84bb2322647a6de7a7b1573837b9222b16e348f023b8d47e0a5130","src/packed/teddy/mod.rs":"f63db3419b1d378929bf0bc1f0e3b909ff3c38b9f2b6e86ba4546b8f39907cd3","src/packed/teddy/runtime.rs":"0a1250ea73159b3be6e0fa9a3f55ecedbb2cb90cb798d1709e9f5ee48f8855d5","src/packed/tests.rs":"0b52ab9eef73a1a4f141f475a9fa98e54d447104aa69acba3a7f8248ce7164b2","src/packed/vector.rs":"ab3c0535fca5f09198d58cbfae44c292aeb3ce44bc92bca36d30dc72963639fc","src/prefilter.rs":"f615e929629f9356fb779a4456a0b6b1ee139960029df71d41620bf3fed9282d","src/state_id.rs":"50958ca2b089d775fb4e49a64950e2f1e8a4af1772fe782ae3715a7745dcc6d7","src/tests.rs":"7458d220c78bbc382c1332e0a222f7e47b6b8ff1fac666d46db4c3a9e63cef4c"},"package":"8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"}

View File

@ -1,56 +0,0 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_binary",
"rust_library",
"rust_test",
)
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"unencumbered", # Unlicense from expression "Unlicense OR MIT"
])
# Generated Targets
rust_library(
name = "aho_corasick",
srcs = glob(["**/*.rs"]),
crate_features = [
"default",
"std",
],
crate_root = "src/lib.rs",
crate_type = "lib",
data = [],
edition = "2015",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "0.7.10",
# buildifier: leave-alone
deps = [
"//third_party/cargo/vendor/memchr-2.3.3:memchr",
],
)

View File

@ -1,42 +0,0 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "aho-corasick"
version = "0.7.10"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
exclude = ["/aho-corasick-debug", "/ci/*", "/.travis.yml", "/appveyor.yml"]
autotests = false
description = "Fast multiple substring searching."
homepage = "https://github.com/BurntSushi/aho-corasick"
readme = "README.md"
keywords = ["string", "search", "text", "aho", "multi"]
categories = ["text-processing"]
license = "Unlicense/MIT"
repository = "https://github.com/BurntSushi/aho-corasick"
[profile.bench]
debug = true
[profile.release]
debug = true
[lib]
name = "aho_corasick"
[dependencies.memchr]
version = "2.2.0"
default-features = false
[dev-dependencies.doc-comment]
version = "0.3.1"
[features]
default = ["std"]
std = ["memchr/use_std"]

View File

@ -1,186 +0,0 @@
aho-corasick
============
A library for finding occurrences of many patterns at once with SIMD
acceleration in some cases. This library provides multiple pattern
search principally through an implementation of the
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
which builds a finite state machine for executing searches in linear time.
Features include case insensitive matching, overlapping matches and search &
replace in streams.
[![Build status](https://github.com/BurntSushi/aho-corasick/workflows/ci/badge.svg)](https://github.com/BurntSushi/aho-corasick/actions)
[![](http://meritbadge.herokuapp.com/aho-corasick)](https://crates.io/crates/aho-corasick)
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
### Documentation
https://docs.rs/aho-corasick
### Usage
Add this to your `Cargo.toml`:
```toml
[dependencies]
aho-corasick = "0.7"
```
and this to your crate root (if you're using Rust 2015):
```rust
extern crate aho_corasick;
```
### Example: basic searching
This example shows how to search for occurrences of multiple patterns
simultaneously. Each match includes the pattern that matched along with the
byte offsets of the match.
```rust
use aho_corasick::AhoCorasick;
let patterns = &["apple", "maple", "Snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasick::new(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
### Example: case insensitivity
This is like the previous example, but matches `Snapple` case insensitively
using `AhoCorasickBuilder`:
```rust
use aho_corasick::AhoCorasickBuilder;
let patterns = &["apple", "maple", "snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
### Example: replacing matches in a stream
This example shows how to execute a search and replace on a stream without
loading the entire stream into memory first.
```rust
use aho_corasick::AhoCorasick;
let patterns = &["fox", "brown", "quick"];
let replace_with = &["sloth", "grey", "slow"];
// In a real example, these might be `std::fs::File`s instead. All you need to
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
let rdr = "The quick brown fox.";
let mut wtr = vec![];
let ac = AhoCorasick::new(patterns);
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
```
### Example: finding the leftmost first match
In the textbook description of Aho-Corasick, its formulation is typically
structured such that it reports all possible matches, even when they overlap
with another. In many cases, overlapping matches may not be desired, such as
the case of finding all successive non-overlapping matches like you might with
a standard regular expression.
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
this doesn't always work in the expected way, since it will report matches as
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
against the text `Samwise`. Most regex engines (that are Perl-like, or
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
algorithm modified for reporting non-overlapping matches will report `Sam`.
A novel contribution of this library is the ability to change the match
semantics of Aho-Corasick (without additional search time overhead) such that
`Samwise` is reported instead. For example, here's the standard approach:
```rust
use aho_corasick::AhoCorasick;
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasick::new(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
```
And now here's the leftmost-first version, which matches how a Perl-like
regex will work:
```rust
use aho_corasick::{AhoCorasickBuilder, MatchKind};
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
```
In addition to leftmost-first semantics, this library also supports
leftmost-longest semantics, which match the POSIX behavior of a regular
expression alternation. See `MatchKind` in the docs for more details.
### Minimum Rust version policy
This crate's minimum supported `rustc` version is `1.28.0`.
The current policy is that the minimum Rust version required to use this crate
can be increased in minor version updates. For example, if `crate 1.0` requires
Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust
1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum
version of Rust.
In general, this crate will be conservative with respect to the minimum
supported version of Rust.
### Future work
Here are some plans for the future:
* Assuming the current API is sufficient, I'd like to commit to it and release
a `1.0` version of this crate some time in the next 6-12 months.
* Support stream searching with leftmost match semantics. Currently, only
standard match semantics are supported. Getting this right seems possible,
but is tricky since the match state needs to be propagated through multiple
searches. (With standard semantics, as soon as a match is seen the search
ends.)

View File

@ -1,130 +0,0 @@
use std::cmp;
use std::io;
use std::ptr;
/// The default buffer capacity that we use for the stream buffer.
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB
/// A fairly simple roll buffer for supporting stream searches.
///
/// This buffer acts as a temporary place to store a fixed amount of data when
/// reading from a stream. Its central purpose is to allow "rolling" some
/// suffix of the data to the beginning of the buffer before refilling it with
/// more data from the stream. For example, let's say we are trying to match
/// "foobar" on a stream. When we report the match, we'd like to not only
/// report the correct offsets at which the match occurs, but also the matching
/// bytes themselves. So let's say our stream is a file with the following
/// contents: `test test foobar test test`. Now assume that we happen to read
/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
/// Naively, it would not be possible to report a single contiguous `foobar`
/// match, but this roll buffer allows us to do that. Namely, after the second
/// read, the contents of the buffer should be `st foobar test test`, where the
/// search should ultimately resume immediately after `foo`. (The prefix `st `
/// is included because the roll buffer saves N bytes at the end of the buffer,
/// where N is the maximum possible length of a match.)
///
/// A lot of the logic for dealing with this is unfortunately split out between
/// this roll buffer and the `StreamChunkIter`.
#[derive(Debug)]
pub struct Buffer {
/// The raw buffer contents. This has a fixed size and never increases.
buf: Vec<u8>,
/// The minimum size of the buffer, which is equivalent to the maximum
/// possible length of a match. This corresponds to the amount that we
/// roll
min: usize,
/// The end of the contents of this buffer.
end: usize,
}
impl Buffer {
/// Create a new buffer for stream searching. The minimum buffer length
/// given should be the size of the maximum possible match length.
pub fn new(min_buffer_len: usize) -> Buffer {
let min = cmp::max(1, min_buffer_len);
// The minimum buffer amount is also the amount that we roll our
// buffer in order to support incremental searching. To this end,
// our actual capacity needs to be at least 1 byte bigger than our
// minimum amount, otherwise we won't have any overlap. In actuality,
// we want our buffer to be a bit bigger than that for performance
// reasons, so we set a lower bound of `8 * min`.
//
// TODO: It would be good to find a way to test the streaming
// implementation with the minimal buffer size.
let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
Buffer { buf: vec![0; capacity], min, end: 0 }
}
/// Return the contents of this buffer.
#[inline]
pub fn buffer(&self) -> &[u8] {
&self.buf[..self.end]
}
/// Return the minimum size of the buffer. The only way a buffer may be
/// smaller than this is if the stream itself contains less than the
/// minimum buffer amount.
#[inline]
pub fn min_buffer_len(&self) -> usize {
self.min
}
/// Return the total length of the contents in the buffer.
#[inline]
pub fn len(&self) -> usize {
self.end
}
/// Return all free capacity in this buffer.
fn free_buffer(&mut self) -> &mut [u8] {
&mut self.buf[self.end..]
}
/// Refill the contents of this buffer by reading as much as possible into
/// this buffer's free capacity. If no more bytes could be read, then this
/// returns false. Otherwise, this reads until it has filled the buffer
/// past the minimum amount.
pub fn fill<R: io::Read>(&mut self, mut rdr: R) -> io::Result<bool> {
let mut readany = false;
loop {
let readlen = rdr.read(self.free_buffer())?;
if readlen == 0 {
return Ok(readany);
}
readany = true;
self.end += readlen;
if self.len() >= self.min {
return Ok(true);
}
}
}
/// Roll the contents of the buffer so that the suffix of this buffer is
/// moved to the front and all other contents are dropped. The size of the
/// suffix corresponds precisely to the minimum buffer length.
///
/// This should only be called when the entire contents of this buffer have
/// been searched.
pub fn roll(&mut self) {
let roll_start = self
.end
.checked_sub(self.min)
.expect("buffer capacity should be bigger than minimum amount");
let roll_len = self.min;
assert!(roll_start + roll_len <= self.end);
unsafe {
// SAFETY: A buffer contains Copy data, so there's no problem
// moving it around. Safety also depends on our indices being in
// bounds, which they always should be, given the assert above.
//
// TODO: Switch to [T]::copy_within once our MSRV is high enough.
ptr::copy(
self.buf[roll_start..].as_ptr(),
self.buf.as_mut_ptr(),
roll_len,
);
}
self.end = roll_len;
}
}

View File

@ -1,709 +0,0 @@
use std::mem::size_of;
use ahocorasick::MatchKind;
use automaton::Automaton;
use classes::ByteClasses;
use error::Result;
use nfa::{PatternID, PatternLength, NFA};
use prefilter::{Prefilter, PrefilterObj, PrefilterState};
use state_id::{dead_id, fail_id, premultiply_overflow_error, StateID};
use Match;
#[derive(Clone, Debug)]
pub enum DFA<S> {
Standard(Standard<S>),
ByteClass(ByteClass<S>),
Premultiplied(Premultiplied<S>),
PremultipliedByteClass(PremultipliedByteClass<S>),
}
impl<S: StateID> DFA<S> {
fn repr(&self) -> &Repr<S> {
match *self {
DFA::Standard(ref dfa) => dfa.repr(),
DFA::ByteClass(ref dfa) => dfa.repr(),
DFA::Premultiplied(ref dfa) => dfa.repr(),
DFA::PremultipliedByteClass(ref dfa) => dfa.repr(),
}
}
pub fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
pub fn heap_bytes(&self) -> usize {
self.repr().heap_bytes
}
pub fn max_pattern_len(&self) -> usize {
self.repr().max_pattern_len
}
pub fn pattern_count(&self) -> usize {
self.repr().pattern_count
}
pub fn start_state(&self) -> S {
self.repr().start_id
}
#[inline(always)]
pub fn overlapping_find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut S,
match_index: &mut usize,
) -> Option<Match> {
match *self {
DFA::Standard(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::ByteClass(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
}
}
#[inline(always)]
pub fn earliest_find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut S,
) -> Option<Match> {
match *self {
DFA::Standard(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
DFA::ByteClass(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
DFA::Premultiplied(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
DFA::PremultipliedByteClass(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
}
}
#[inline(always)]
pub fn find_at_no_state(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Option<Match> {
match *self {
DFA::Standard(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
DFA::ByteClass(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
DFA::Premultiplied(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
DFA::PremultipliedByteClass(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
}
}
}
#[derive(Clone, Debug)]
pub struct Standard<S>(Repr<S>);
impl<S: StateID> Standard<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for Standard<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
id.to_usize() < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
self.repr().get_match(id, match_index, end)
}
fn match_count(&self, id: S) -> usize {
self.repr().match_count(id)
}
fn next_state(&self, current: S, input: u8) -> S {
let o = current.to_usize() * 256 + input as usize;
self.repr().trans[o]
}
}
#[derive(Clone, Debug)]
pub struct ByteClass<S>(Repr<S>);
impl<S: StateID> ByteClass<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for ByteClass<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
id.to_usize() < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
self.repr().get_match(id, match_index, end)
}
fn match_count(&self, id: S) -> usize {
self.repr().match_count(id)
}
fn next_state(&self, current: S, input: u8) -> S {
let alphabet_len = self.repr().byte_classes.alphabet_len();
let input = self.repr().byte_classes.get(input);
let o = current.to_usize() * alphabet_len + input as usize;
self.repr().trans[o]
}
}
#[derive(Clone, Debug)]
pub struct Premultiplied<S>(Repr<S>);
impl<S: StateID> Premultiplied<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for Premultiplied<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
(id.to_usize() / 256) < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
if id > self.repr().max_match {
return None;
}
self.repr()
.matches
.get(id.to_usize() / 256)
.and_then(|m| m.get(match_index))
.map(|&(id, len)| Match { pattern: id, len, end })
}
fn match_count(&self, id: S) -> usize {
let o = id.to_usize() / 256;
self.repr().matches[o].len()
}
fn next_state(&self, current: S, input: u8) -> S {
let o = current.to_usize() + input as usize;
self.repr().trans[o]
}
}
#[derive(Clone, Debug)]
pub struct PremultipliedByteClass<S>(Repr<S>);
impl<S: StateID> PremultipliedByteClass<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for PremultipliedByteClass<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
(id.to_usize() / self.repr().alphabet_len()) < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
if id > self.repr().max_match {
return None;
}
self.repr()
.matches
.get(id.to_usize() / self.repr().alphabet_len())
.and_then(|m| m.get(match_index))
.map(|&(id, len)| Match { pattern: id, len, end })
}
fn match_count(&self, id: S) -> usize {
let o = id.to_usize() / self.repr().alphabet_len();
self.repr().matches[o].len()
}
fn next_state(&self, current: S, input: u8) -> S {
let input = self.repr().byte_classes.get(input);
let o = current.to_usize() + input as usize;
self.repr().trans[o]
}
}
#[derive(Clone, Debug)]
pub struct Repr<S> {
match_kind: MatchKind,
anchored: bool,
premultiplied: bool,
start_id: S,
/// The length, in bytes, of the longest pattern in this automaton. This
/// information is useful for keeping correct buffer sizes when searching
/// on streams.
max_pattern_len: usize,
/// The total number of patterns added to this automaton. This includes
/// patterns that may never match.
pattern_count: usize,
state_count: usize,
max_match: S,
/// The number of bytes of heap used by this NFA's transition table.
heap_bytes: usize,
/// A prefilter for quickly detecting candidate matchs, if pertinent.
prefilter: Option<PrefilterObj>,
byte_classes: ByteClasses,
trans: Vec<S>,
matches: Vec<Vec<(PatternID, PatternLength)>>,
}
impl<S: StateID> Repr<S> {
/// Returns the total alphabet size for this DFA.
///
/// If byte classes are enabled, then this corresponds to the number of
/// equivalence classes. If they are disabled, then this is always 256.
fn alphabet_len(&self) -> usize {
self.byte_classes.alphabet_len()
}
/// Returns true only if the given state is a match state.
fn is_match_state(&self, id: S) -> bool {
id <= self.max_match && id > dead_id()
}
/// Returns true only if the given state is either a dead state or a match
/// state.
fn is_match_or_dead_state(&self, id: S) -> bool {
id <= self.max_match
}
/// Get the ith match for the given state, where the end position of a
/// match was found at `end`.
///
/// # Panics
///
/// The caller must ensure that the given state identifier is valid,
/// otherwise this may panic. The `match_index` need not be valid. That is,
/// if the given state has no matches then this returns `None`.
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
if id > self.max_match {
return None;
}
self.matches
.get(id.to_usize())
.and_then(|m| m.get(match_index))
.map(|&(id, len)| Match { pattern: id, len, end })
}
/// Return the total number of matches for the given state.
///
/// # Panics
///
/// The caller must ensure that the given identifier is valid, or else
/// this panics.
fn match_count(&self, id: S) -> usize {
self.matches[id.to_usize()].len()
}
/// Get the next state given `from` as the current state and `byte` as the
/// current input byte.
fn next_state(&self, from: S, byte: u8) -> S {
let alphabet_len = self.alphabet_len();
let byte = self.byte_classes.get(byte);
self.trans[from.to_usize() * alphabet_len + byte as usize]
}
/// Set the `byte` transition for the `from` state to point to `to`.
fn set_next_state(&mut self, from: S, byte: u8, to: S) {
let alphabet_len = self.alphabet_len();
let byte = self.byte_classes.get(byte);
self.trans[from.to_usize() * alphabet_len + byte as usize] = to;
}
/// Swap the given states in place.
fn swap_states(&mut self, id1: S, id2: S) {
assert!(!self.premultiplied, "can't swap states in premultiplied DFA");
let o1 = id1.to_usize() * self.alphabet_len();
let o2 = id2.to_usize() * self.alphabet_len();
for b in 0..self.alphabet_len() {
self.trans.swap(o1 + b, o2 + b);
}
self.matches.swap(id1.to_usize(), id2.to_usize());
}
/// This routine shuffles all match states in this DFA to the beginning
/// of the DFA such that every non-match state appears after every match
/// state. (With one exception: the special fail and dead states remain as
/// the first two states.)
///
/// The purpose of doing this shuffling is to avoid an extra conditional
/// in the search loop, and in particular, detecting whether a state is a
/// match or not does not need to access any memory.
///
/// This updates `self.max_match` to point to the last matching state as
/// well as `self.start` if the starting state was moved.
fn shuffle_match_states(&mut self) {
assert!(
!self.premultiplied,
"cannot shuffle match states of premultiplied DFA"
);
if self.state_count <= 1 {
return;
}
let mut first_non_match = self.start_id.to_usize();
while first_non_match < self.state_count
&& self.matches[first_non_match].len() > 0
{
first_non_match += 1;
}
let mut swaps: Vec<S> = vec![fail_id(); self.state_count];
let mut cur = self.state_count - 1;
while cur > first_non_match {
if self.matches[cur].len() > 0 {
self.swap_states(
S::from_usize(cur),
S::from_usize(first_non_match),
);
swaps[cur] = S::from_usize(first_non_match);
swaps[first_non_match] = S::from_usize(cur);
first_non_match += 1;
while first_non_match < cur
&& self.matches[first_non_match].len() > 0
{
first_non_match += 1;
}
}
cur -= 1;
}
for id in (0..self.state_count).map(S::from_usize) {
let alphabet_len = self.alphabet_len();
let offset = id.to_usize() * alphabet_len;
for next in &mut self.trans[offset..offset + alphabet_len] {
if swaps[next.to_usize()] != fail_id() {
*next = swaps[next.to_usize()];
}
}
}
if swaps[self.start_id.to_usize()] != fail_id() {
self.start_id = swaps[self.start_id.to_usize()];
}
self.max_match = S::from_usize(first_non_match - 1);
}
fn premultiply(&mut self) -> Result<()> {
if self.premultiplied || self.state_count <= 1 {
return Ok(());
}
let alpha_len = self.alphabet_len();
premultiply_overflow_error(
S::from_usize(self.state_count - 1),
alpha_len,
)?;
for id in (2..self.state_count).map(S::from_usize) {
let offset = id.to_usize() * alpha_len;
for next in &mut self.trans[offset..offset + alpha_len] {
if *next == dead_id() {
continue;
}
*next = S::from_usize(next.to_usize() * alpha_len);
}
}
self.premultiplied = true;
self.start_id = S::from_usize(self.start_id.to_usize() * alpha_len);
self.max_match = S::from_usize(self.max_match.to_usize() * alpha_len);
Ok(())
}
/// Computes the total amount of heap used by this NFA in bytes.
fn calculate_size(&mut self) {
let mut size = (self.trans.len() * size_of::<S>())
+ (self.matches.len()
* size_of::<Vec<(PatternID, PatternLength)>>());
for state_matches in &self.matches {
size +=
state_matches.len() * size_of::<(PatternID, PatternLength)>();
}
size += self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes());
self.heap_bytes = size;
}
}
/// A builder for configuring the determinization of an NFA into a DFA.
#[derive(Clone, Debug)]
pub struct Builder {
premultiply: bool,
byte_classes: bool,
}
impl Builder {
/// Create a new builder for a DFA.
pub fn new() -> Builder {
Builder { premultiply: true, byte_classes: true }
}
/// Build a DFA from the given NFA.
///
/// This returns an error if the state identifiers exceed their
/// representation size. This can only happen when state ids are
/// premultiplied (which is enabled by default).
pub fn build<S: StateID>(&self, nfa: &NFA<S>) -> Result<DFA<S>> {
let byte_classes = if self.byte_classes {
nfa.byte_classes().clone()
} else {
ByteClasses::singletons()
};
let alphabet_len = byte_classes.alphabet_len();
let trans = vec![fail_id(); alphabet_len * nfa.state_len()];
let matches = vec![vec![]; nfa.state_len()];
let mut repr = Repr {
match_kind: nfa.match_kind().clone(),
anchored: nfa.anchored(),
premultiplied: false,
start_id: nfa.start_state(),
max_pattern_len: nfa.max_pattern_len(),
pattern_count: nfa.pattern_count(),
state_count: nfa.state_len(),
max_match: fail_id(),
heap_bytes: 0,
prefilter: nfa.prefilter_obj().map(|p| p.clone()),
byte_classes: byte_classes.clone(),
trans,
matches,
};
for id in (0..nfa.state_len()).map(S::from_usize) {
repr.matches[id.to_usize()].extend_from_slice(nfa.matches(id));
let fail = nfa.failure_transition(id);
nfa.iter_all_transitions(&byte_classes, id, |b, mut next| {
if next == fail_id() {
next = nfa_next_state_memoized(nfa, &repr, id, fail, b);
}
repr.set_next_state(id, b, next);
});
}
repr.shuffle_match_states();
repr.calculate_size();
if self.premultiply {
repr.premultiply()?;
if byte_classes.is_singleton() {
Ok(DFA::Premultiplied(Premultiplied(repr)))
} else {
Ok(DFA::PremultipliedByteClass(PremultipliedByteClass(repr)))
}
} else {
if byte_classes.is_singleton() {
Ok(DFA::Standard(Standard(repr)))
} else {
Ok(DFA::ByteClass(ByteClass(repr)))
}
}
}
/// Whether to use byte classes or in the DFA.
pub fn byte_classes(&mut self, yes: bool) -> &mut Builder {
self.byte_classes = yes;
self
}
/// Whether to premultiply state identifier in the DFA.
pub fn premultiply(&mut self, yes: bool) -> &mut Builder {
self.premultiply = yes;
self
}
}
/// This returns the next NFA transition (including resolving failure
/// transitions), except once it sees a state id less than the id of the DFA
/// state that is currently being populated, then we no longer need to follow
/// failure transitions and can instead query the pre-computed state id from
/// the DFA itself.
///
/// In general, this should only be called when a failure transition is seen.
fn nfa_next_state_memoized<S: StateID>(
nfa: &NFA<S>,
dfa: &Repr<S>,
populating: S,
mut current: S,
input: u8,
) -> S {
loop {
if current < populating {
return dfa.next_state(current, input);
}
let next = nfa.next_state(current, input);
if next != fail_id() {
return next;
}
current = nfa.failure_transition(current);
}
}

View File

@ -1,297 +0,0 @@
/*!
A library for finding occurrences of many patterns at once. This library
provides multiple pattern search principally through an implementation of the
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
which builds a fast finite state machine for executing searches in linear time.
Additionally, this library provides a number of configuration options for
building the automaton that permit controlling the space versus time trade
off. Other features include simple ASCII case insensitive matching, finding
overlapping matches, replacements, searching streams and even searching and
replacing text in streams.
Finally, unlike all other (known) Aho-Corasick implementations, this one
supports enabling
[leftmost-first](enum.MatchKind.html#variant.LeftmostFirst)
or
[leftmost-longest](enum.MatchKind.html#variant.LeftmostFirst)
match semantics, using a (seemingly) novel alternative construction algorithm.
For more details on what match semantics means, see the
[`MatchKind`](enum.MatchKind.html)
type.
# Overview
This section gives a brief overview of the primary types in this crate:
* [`AhoCorasick`](struct.AhoCorasick.html) is the primary type and represents
an Aho-Corasick automaton. This is the type you use to execute searches.
* [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) can be used to build
an Aho-Corasick automaton, and supports configuring a number of options.
* [`Match`](struct.Match.html) represents a single match reported by an
Aho-Corasick automaton. Each match has two pieces of information: the pattern
that matched and the start and end byte offsets corresponding to the position
in the haystack at which it matched.
Additionally, the [`packed`](packed/index.html) sub-module contains a lower
level API for using fast vectorized routines for finding a small number of
patterns in a haystack.
# Example: basic searching
This example shows how to search for occurrences of multiple patterns
simultaneously. Each match includes the pattern that matched along with the
byte offsets of the match.
```
use aho_corasick::AhoCorasick;
let patterns = &["apple", "maple", "Snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasick::new(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
# Example: case insensitivity
This is like the previous example, but matches `Snapple` case insensitively
using `AhoCorasickBuilder`:
```
use aho_corasick::AhoCorasickBuilder;
let patterns = &["apple", "maple", "snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
# Example: replacing matches in a stream
This example shows how to execute a search and replace on a stream without
loading the entire stream into memory first.
```
use aho_corasick::AhoCorasick;
# fn example() -> Result<(), ::std::io::Error> {
let patterns = &["fox", "brown", "quick"];
let replace_with = &["sloth", "grey", "slow"];
// In a real example, these might be `std::fs::File`s instead. All you need to
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
let rdr = "The quick brown fox.";
let mut wtr = vec![];
let ac = AhoCorasick::new(patterns);
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
# Ok(()) }; example().unwrap()
```
# Example: finding the leftmost first match
In the textbook description of Aho-Corasick, its formulation is typically
structured such that it reports all possible matches, even when they overlap
with another. In many cases, overlapping matches may not be desired, such as
the case of finding all successive non-overlapping matches like you might with
a standard regular expression.
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
this doesn't always work in the expected way, since it will report matches as
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
against the text `Samwise`. Most regex engines (that are Perl-like, or
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
algorithm modified for reporting non-overlapping matches will report `Sam`.
A novel contribution of this library is the ability to change the match
semantics of Aho-Corasick (without additional search time overhead) such that
`Samwise` is reported instead. For example, here's the standard approach:
```
use aho_corasick::AhoCorasick;
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasick::new(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
```
And now here's the leftmost-first version, which matches how a Perl-like
regex will work:
```
use aho_corasick::{AhoCorasickBuilder, MatchKind};
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
```
In addition to leftmost-first semantics, this library also supports
leftmost-longest semantics, which match the POSIX behavior of a regular
expression alternation. See
[`MatchKind`](enum.MatchKind.html)
for more details.
# Prefilters
While an Aho-Corasick automaton can perform admirably when compared to more
naive solutions, it is generally slower than more specialized algorithms that
are accelerated using vector instructions such as SIMD.
For that reason, this library will internally use a "prefilter" to attempt
to accelerate searches when possible. Currently, this library has fairly
limited implementation that only applies when there are 3 or fewer unique
starting bytes among all patterns in an automaton.
While a prefilter is generally good to have on by default since it works well
in the common case, it can lead to less predictable or even sub-optimal
performance in some cases. For that reason, prefilters can be disabled via
[`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter).
*/
#![deny(missing_docs)]
// We can never be truly no_std, but we could be alloc-only some day, so
// require the std feature for now.
#[cfg(not(feature = "std"))]
compile_error!("`std` feature is currently required to build this crate");
extern crate memchr;
#[cfg(test)]
#[macro_use]
extern crate doc_comment;
#[cfg(test)]
doctest!("../README.md");
pub use ahocorasick::{
AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind,
StreamFindIter,
};
pub use error::{Error, ErrorKind};
pub use state_id::StateID;
mod ahocorasick;
mod automaton;
mod buffer;
mod byte_frequencies;
mod classes;
mod dfa;
mod error;
mod nfa;
pub mod packed;
mod prefilter;
mod state_id;
#[cfg(test)]
mod tests;
/// A representation of a match reported by an Aho-Corasick automaton.
///
/// A match has two essential pieces of information: the identifier of the
/// pattern that matched, along with the start and end offsets of the match
/// in the haystack.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use aho_corasick::AhoCorasick;
///
/// let ac = AhoCorasick::new(&[
/// "foo", "bar", "baz",
/// ]);
/// let mat = ac.find("xxx bar xxx").expect("should have a match");
/// assert_eq!(1, mat.pattern());
/// assert_eq!(4, mat.start());
/// assert_eq!(7, mat.end());
/// ```
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Match {
/// The pattern id.
pattern: usize,
/// The length of this match, such that the starting position of the match
/// is `end - len`.
///
/// We use length here because, other than the pattern id, the only
/// information about each pattern that the automaton stores is its length.
/// So using the length here is just a bit more natural. But it isn't
/// technically required.
len: usize,
/// The end offset of the match, exclusive.
end: usize,
}
impl Match {
/// Returns the identifier of the pattern that matched.
///
/// The identifier of a pattern is derived from the position in which it
/// was originally inserted into the corresponding automaton. The first
/// pattern has identifier `0`, and each subsequent pattern is `1`, `2`
/// and so on.
#[inline]
pub fn pattern(&self) -> usize {
self.pattern
}
/// The starting position of the match.
#[inline]
pub fn start(&self) -> usize {
self.end - self.len
}
/// The ending position of the match.
#[inline]
pub fn end(&self) -> usize {
self.end
}
/// Returns true if and only if this match is empty. That is, when
/// `start() == end()`.
///
/// An empty match can only be returned when the empty string was among
/// the patterns used to build the Aho-Corasick automaton.
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
fn increment(&self, by: usize) -> Match {
Match { pattern: self.pattern, len: self.len, end: self.end + by }
}
#[inline]
fn from_span(id: usize, start: usize, end: usize) -> Match {
Match { pattern: id, len: end - start, end }
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"cf15db60c9989b860ea874785f1d8b1b5350a28501cd169831042837ee6f350c","DESIGN.md":"9065f33d818d1562244d36dc4781e2a351108030cee17f11c2ba512ca7b4c27e","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"88a0a32dce081f2b1a5c71f2208c155961b0171daf1a5a9a70371fc5cf0c4304","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"00db7f3c3e9c764f7e2e620ee28baeebaf2101191218dd06b75d7656749a096f","src/automaton.rs":"ea3fc2648e026eac9f9969b0d457e49af7b4a40044379ce010d054f22afbc98f","src/buffer.rs":"dae7ee7c1f846ca9cf115ba4949484000e1837b4fb7311f8d8c9a35011c9c26f","src/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/classes.rs":"166c9f15c9a2e370e2bc9a9e1620bb2db13df52edfde9a0db1f20144519a7e72","src/dfa.rs":"8302d809d0acbf2a9d5f947d91e28d779cc547e49256961029f1c01e3375cba9","src/error.rs":"36dbf2cefbfaa8a69186551320dbff023d3e82780a6c925e87c3e3997b967e66","src/lib.rs":"2d2ef0070fb6c4ba6695e948e60e05ed63d3e8c31de28d3c112653798f95d6d3","src/nfa.rs":"3f68abf96a1c556021f0b7011634b3273b3df7e5366c9d27a2fc63e93ffbd95d","src/packed/api.rs":"6c65dfa177b7d7b79f90a048f260bec7f817126c693b85f49704c7d2ecf5f646","src/packed/mod.rs":"29c76ad3cbb1f831140cefac7a27fb504ac4af4f454975a571965b48aad417eb","src/packed/pattern.rs":"b88c57af057997da0a5a06f4c5604a7e598c20acfc11c15cd8977727f6e1cf9c","src/packed/rabinkarp.rs":"b3242a8631ea5607163dcbb641e4ac9c6da26774378da1e51651b0ab5656b390","src/packed/teddy/README.md":"5819f40d221af93288e705eadef5393a41d7a0900881b4d676e01fd65d5adf15","src/packed/teddy/compile.rs":"5d7de6a45a84bb2322647a6de7a7b1573837b9222b16e348f023b8d47e0a5130","src/packed/teddy/mod.rs":"f63db3419b1d378929bf0bc1f0e3b909ff3c38b9f2b6e86ba4546b8f39907cd3","src/packed/teddy/runtime.rs":"0a1250ea73159b3be6e0fa9a3f55ecedbb2cb90cb798d1709e9f5ee48f8855d5","src/packed/tests.rs":"0b52ab9eef73a1a4f141f475a9fa98e54d447104aa69acba3a7f8248ce7164b2","src/packed/vector.rs":"ab3c0535fca5f09198d58cbfae44c292aeb3ce44bc92bca36d30dc72963639fc","src/prefilter.rs":"5191cc8a273a909ca460c492357401cb664a66ed0505948e969786d655a9aed8","src/state_id.rs":"50958ca2b089d775fb4e49a64950e2f1e8a4af1772fe782ae3715a7745dcc6d7","src/tests.rs":"837bf7e7b9aa8b215a750475411730fa081370ee93e78a6f516d86280a1ab906"},"package":"7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"}

View File

@ -0,0 +1,56 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_binary",
"rust_library",
"rust_test",
)
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"unencumbered", # Unlicense from expression "Unlicense OR MIT"
])
# Generated Targets
rust_library(
name = "aho_corasick",
srcs = glob(["**/*.rs"]),
crate_features = [
"default",
"std",
],
crate_root = "src/lib.rs",
crate_type = "lib",
data = [],
edition = "2015",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "0.7.15",
# buildifier: leave-alone
deps = [
"//third_party/cargo/vendor/memchr-2.3.4:memchr",
],
)

View File

@ -0,0 +1,42 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "aho-corasick"
version = "0.7.15"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
exclude = ["/aho-corasick-debug", "/ci/*", "/.travis.yml", "/appveyor.yml"]
autotests = false
description = "Fast multiple substring searching."
homepage = "https://github.com/BurntSushi/aho-corasick"
readme = "README.md"
keywords = ["string", "search", "text", "aho", "multi"]
categories = ["text-processing"]
license = "Unlicense/MIT"
repository = "https://github.com/BurntSushi/aho-corasick"
[profile.bench]
debug = true
[profile.release]
debug = true
[lib]
name = "aho_corasick"
[dependencies.memchr]
version = "2.2.0"
default-features = false
[dev-dependencies.doc-comment]
version = "0.3.1"
[features]
default = ["std"]
std = ["memchr/use_std"]

View File

@ -0,0 +1,187 @@
aho-corasick
============
A library for finding occurrences of many patterns at once with SIMD
acceleration in some cases. This library provides multiple pattern
search principally through an implementation of the
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
which builds a finite state machine for executing searches in linear time.
Features include case insensitive matching, overlapping matches, fast searching
via SIMD and optional full DFA construction and search & replace in streams.
[![Build status](https://github.com/BurntSushi/aho-corasick/workflows/ci/badge.svg)](https://github.com/BurntSushi/aho-corasick/actions)
[![](http://meritbadge.herokuapp.com/aho-corasick)](https://crates.io/crates/aho-corasick)
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
### Documentation
https://docs.rs/aho-corasick
### Usage
Add this to your `Cargo.toml`:
```toml
[dependencies]
aho-corasick = "0.7"
```
and this to your crate root (if you're using Rust 2015):
```rust
extern crate aho_corasick;
```
### Example: basic searching
This example shows how to search for occurrences of multiple patterns
simultaneously. Each match includes the pattern that matched along with the
byte offsets of the match.
```rust
use aho_corasick::AhoCorasick;
let patterns = &["apple", "maple", "Snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasick::new(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
### Example: case insensitivity
This is like the previous example, but matches `Snapple` case insensitively
using `AhoCorasickBuilder`:
```rust
use aho_corasick::AhoCorasickBuilder;
let patterns = &["apple", "maple", "snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
### Example: replacing matches in a stream
This example shows how to execute a search and replace on a stream without
loading the entire stream into memory first.
```rust
use aho_corasick::AhoCorasick;
let patterns = &["fox", "brown", "quick"];
let replace_with = &["sloth", "grey", "slow"];
// In a real example, these might be `std::fs::File`s instead. All you need to
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
let rdr = "The quick brown fox.";
let mut wtr = vec![];
let ac = AhoCorasick::new(patterns);
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)
.expect("stream_replace_all failed");
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
```
### Example: finding the leftmost first match
In the textbook description of Aho-Corasick, its formulation is typically
structured such that it reports all possible matches, even when they overlap
with another. In many cases, overlapping matches may not be desired, such as
the case of finding all successive non-overlapping matches like you might with
a standard regular expression.
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
this doesn't always work in the expected way, since it will report matches as
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
against the text `Samwise`. Most regex engines (that are Perl-like, or
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
algorithm modified for reporting non-overlapping matches will report `Sam`.
A novel contribution of this library is the ability to change the match
semantics of Aho-Corasick (without additional search time overhead) such that
`Samwise` is reported instead. For example, here's the standard approach:
```rust
use aho_corasick::AhoCorasick;
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasick::new(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
```
And now here's the leftmost-first version, which matches how a Perl-like
regex will work:
```rust
use aho_corasick::{AhoCorasickBuilder, MatchKind};
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
```
In addition to leftmost-first semantics, this library also supports
leftmost-longest semantics, which match the POSIX behavior of a regular
expression alternation. See `MatchKind` in the docs for more details.
### Minimum Rust version policy
This crate's minimum supported `rustc` version is `1.28.0`.
The current policy is that the minimum Rust version required to use this crate
can be increased in minor version updates. For example, if `crate 1.0` requires
Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust
1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum
version of Rust.
In general, this crate will be conservative with respect to the minimum
supported version of Rust.
### Future work
Here are some plans for the future:
* Assuming the current API is sufficient, I'd like to commit to it and release
a `1.0` version of this crate some time in the next 6-12 months.
* Support stream searching with leftmost match semantics. Currently, only
standard match semantics are supported. Getting this right seems possible,
but is tricky since the match state needs to be propagated through multiple
searches. (With standard semantics, as soon as a match is seen the search
ends.)

View File

@ -6,7 +6,7 @@ use dfa::{self, DFA};
use error::Result;
use nfa::{self, NFA};
use packed;
use prefilter::PrefilterState;
use prefilter::{Prefilter, PrefilterState};
use state_id::StateID;
use Match;
@ -502,7 +502,7 @@ impl<S: StateID> AhoCorasick<S> {
/// The closure accepts three parameters: the match found, the text of
/// the match and a string buffer with which to write the replaced text
/// (if any). If the closure returns `true`, then it continues to the next
/// match. If the closure returns false, then searching is stopped.
/// match. If the closure returns `false`, then searching is stopped.
///
/// # Examples
///
@ -524,6 +524,24 @@ impl<S: StateID> AhoCorasick<S> {
/// });
/// assert_eq!("0 the 2 to the 0age", result);
/// ```
///
/// Stopping the replacement by returning `false` (continued from the
/// example above):
///
/// ```
/// # use aho_corasick::{AhoCorasickBuilder, MatchKind};
/// # let patterns = &["append", "appendage", "app"];
/// # let haystack = "append the app to the appendage";
/// # let ac = AhoCorasickBuilder::new()
/// # .match_kind(MatchKind::LeftmostFirst)
/// # .build(patterns);
/// let mut result = String::new();
/// ac.replace_all_with(haystack, &mut result, |mat, _, dst| {
/// dst.push_str(&mat.pattern().to_string());
/// mat.pattern() != 2
/// });
/// assert_eq!("0 the 2 to the appendage", result);
/// ```
pub fn replace_all_with<F>(
&self,
haystack: &str,
@ -536,7 +554,9 @@ impl<S: StateID> AhoCorasick<S> {
for mat in self.find_iter(haystack) {
dst.push_str(&haystack[last_match..mat.start()]);
last_match = mat.end();
replace_with(&mat, &haystack[mat.start()..mat.end()], dst);
if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) {
break;
};
}
dst.push_str(&haystack[last_match..]);
}
@ -548,7 +568,7 @@ impl<S: StateID> AhoCorasick<S> {
/// The closure accepts three parameters: the match found, the text of
/// the match and a byte buffer with which to write the replaced text
/// (if any). If the closure returns `true`, then it continues to the next
/// match. If the closure returns false, then searching is stopped.
/// match. If the closure returns `false`, then searching is stopped.
///
/// # Examples
///
@ -570,6 +590,24 @@ impl<S: StateID> AhoCorasick<S> {
/// });
/// assert_eq!(b"0 the 2 to the 0age".to_vec(), result);
/// ```
///
/// Stopping the replacement by returning `false` (continued from the
/// example above):
///
/// ```
/// # use aho_corasick::{AhoCorasickBuilder, MatchKind};
/// # let patterns = &["append", "appendage", "app"];
/// # let haystack = b"append the app to the appendage";
/// # let ac = AhoCorasickBuilder::new()
/// # .match_kind(MatchKind::LeftmostFirst)
/// # .build(patterns);
/// let mut result = vec![];
/// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| {
/// dst.extend(mat.pattern().to_string().bytes());
/// mat.pattern() != 2
/// });
/// assert_eq!(b"0 the 2 to the appendage".to_vec(), result);
/// ```
pub fn replace_all_with_bytes<F>(
&self,
haystack: &[u8],
@ -582,7 +620,9 @@ impl<S: StateID> AhoCorasick<S> {
for mat in self.find_iter(haystack) {
dst.extend(&haystack[last_match..mat.start()]);
last_match = mat.end();
replace_with(&mat, &haystack[mat.start()..mat.end()], dst);
if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) {
break;
};
}
dst.extend(&haystack[last_match..]);
}
@ -735,9 +775,7 @@ impl<S: StateID> AhoCorasick<S> {
/// [`find_iter`](struct.AhoCorasick.html#method.find_iter).
///
/// The closure accepts three parameters: the match found, the text of
/// the match and the writer with which to write the replaced text
/// (if any). If the closure returns `true`, then it continues to the next
/// match. If the closure returns false, then searching is stopped.
/// the match and the writer with which to write the replaced text (if any).
///
/// After all matches are replaced, the writer is _not_ flushed.
///
@ -1037,6 +1075,24 @@ impl<S: StateID> Imp<S> {
}
}
/// Returns the prefilter object, if one exists, for the underlying
/// automaton.
fn prefilter(&self) -> Option<&dyn Prefilter> {
match *self {
Imp::NFA(ref nfa) => nfa.prefilter(),
Imp::DFA(ref dfa) => dfa.prefilter(),
}
}
/// Returns true if and only if we should attempt to use a prefilter.
fn use_prefilter(&self) -> bool {
let p = match self.prefilter() {
None => return false,
Some(p) => p,
};
!p.looks_for_non_start_of_match()
}
#[inline(always)]
fn overlapping_find_at(
&self,
@ -1325,7 +1381,11 @@ impl<'a, R: io::Read, S: StateID> StreamChunkIter<'a, R, S> {
"stream searching is only supported for Standard match semantics"
);
let prestate = PrefilterState::new(ac.max_pattern_len());
let prestate = if ac.imp.use_prefilter() {
PrefilterState::new(ac.max_pattern_len())
} else {
PrefilterState::disabled()
};
let buf = Buffer::new(ac.imp.max_pattern_len());
let state_id = ac.imp.start_state();
StreamChunkIter {
@ -1809,7 +1869,7 @@ impl AhoCorasickBuilder {
/// finite automaton (NFA) is used instead.
///
/// The main benefit to a DFA is that it can execute searches more quickly
/// than a DFA (perhaps 2-4 times as fast). The main drawback is that the
/// than a NFA (perhaps 2-4 times as fast). The main drawback is that the
/// DFA uses more space and can take much longer to build.
///
/// Enabling this option does not change the time complexity for

View File

@ -0,0 +1,132 @@
use std::cmp;
use std::io;
use std::ptr;
/// The default buffer capacity that we use for the stream buffer.
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB
/// A fairly simple roll buffer for supporting stream searches.
///
/// This buffer acts as a temporary place to store a fixed amount of data when
/// reading from a stream. Its central purpose is to allow "rolling" some
/// suffix of the data to the beginning of the buffer before refilling it with
/// more data from the stream. For example, let's say we are trying to match
/// "foobar" on a stream. When we report the match, we'd like to not only
/// report the correct offsets at which the match occurs, but also the matching
/// bytes themselves. So let's say our stream is a file with the following
/// contents: `test test foobar test test`. Now assume that we happen to read
/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
/// Naively, it would not be possible to report a single contiguous `foobar`
/// match, but this roll buffer allows us to do that. Namely, after the second
/// read, the contents of the buffer should be `st foobar test test`, where the
/// search should ultimately resume immediately after `foo`. (The prefix `st `
/// is included because the roll buffer saves N bytes at the end of the buffer,
/// where N is the maximum possible length of a match.)
///
/// A lot of the logic for dealing with this is unfortunately split out between
/// this roll buffer and the `StreamChunkIter`.
#[derive(Debug)]
pub struct Buffer {
/// The raw buffer contents. This has a fixed size and never increases.
buf: Vec<u8>,
/// The minimum size of the buffer, which is equivalent to the maximum
/// possible length of a match. This corresponds to the amount that we
/// roll
min: usize,
/// The end of the contents of this buffer.
end: usize,
}
impl Buffer {
/// Create a new buffer for stream searching. The minimum buffer length
/// given should be the size of the maximum possible match length.
pub fn new(min_buffer_len: usize) -> Buffer {
let min = cmp::max(1, min_buffer_len);
// The minimum buffer amount is also the amount that we roll our
// buffer in order to support incremental searching. To this end,
// our actual capacity needs to be at least 1 byte bigger than our
// minimum amount, otherwise we won't have any overlap. In actuality,
// we want our buffer to be a bit bigger than that for performance
// reasons, so we set a lower bound of `8 * min`.
//
// TODO: It would be good to find a way to test the streaming
// implementation with the minimal buffer size. For now, we just
// uncomment out the next line and comment out the subsequent line.
// let capacity = 1 + min;
let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
Buffer { buf: vec![0; capacity], min, end: 0 }
}
/// Return the contents of this buffer.
#[inline]
pub fn buffer(&self) -> &[u8] {
&self.buf[..self.end]
}
/// Return the minimum size of the buffer. The only way a buffer may be
/// smaller than this is if the stream itself contains less than the
/// minimum buffer amount.
#[inline]
pub fn min_buffer_len(&self) -> usize {
self.min
}
/// Return the total length of the contents in the buffer.
#[inline]
pub fn len(&self) -> usize {
self.end
}
/// Return all free capacity in this buffer.
fn free_buffer(&mut self) -> &mut [u8] {
&mut self.buf[self.end..]
}
/// Refill the contents of this buffer by reading as much as possible into
/// this buffer's free capacity. If no more bytes could be read, then this
/// returns false. Otherwise, this reads until it has filled the buffer
/// past the minimum amount.
pub fn fill<R: io::Read>(&mut self, mut rdr: R) -> io::Result<bool> {
let mut readany = false;
loop {
let readlen = rdr.read(self.free_buffer())?;
if readlen == 0 {
return Ok(readany);
}
readany = true;
self.end += readlen;
if self.len() >= self.min {
return Ok(true);
}
}
}
/// Roll the contents of the buffer so that the suffix of this buffer is
/// moved to the front and all other contents are dropped. The size of the
/// suffix corresponds precisely to the minimum buffer length.
///
/// This should only be called when the entire contents of this buffer have
/// been searched.
pub fn roll(&mut self) {
let roll_start = self
.end
.checked_sub(self.min)
.expect("buffer capacity should be bigger than minimum amount");
let roll_len = self.min;
assert!(roll_start + roll_len <= self.end);
unsafe {
// SAFETY: A buffer contains Copy data, so there's no problem
// moving it around. Safety also depends on our indices being in
// bounds, which they always should be, given the assert above.
//
// TODO: Switch to [T]::copy_within once our MSRV is high enough.
ptr::copy(
self.buf[roll_start..].as_ptr(),
self.buf.as_mut_ptr(),
roll_len,
);
}
self.end = roll_len;
}
}

View File

@ -0,0 +1,713 @@
use std::mem::size_of;
use ahocorasick::MatchKind;
use automaton::Automaton;
use classes::ByteClasses;
use error::Result;
use nfa::{PatternID, PatternLength, NFA};
use prefilter::{Prefilter, PrefilterObj, PrefilterState};
use state_id::{dead_id, fail_id, premultiply_overflow_error, StateID};
use Match;
#[derive(Clone, Debug)]
pub enum DFA<S> {
Standard(Standard<S>),
ByteClass(ByteClass<S>),
Premultiplied(Premultiplied<S>),
PremultipliedByteClass(PremultipliedByteClass<S>),
}
impl<S: StateID> DFA<S> {
fn repr(&self) -> &Repr<S> {
match *self {
DFA::Standard(ref dfa) => dfa.repr(),
DFA::ByteClass(ref dfa) => dfa.repr(),
DFA::Premultiplied(ref dfa) => dfa.repr(),
DFA::PremultipliedByteClass(ref dfa) => dfa.repr(),
}
}
pub fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
pub fn heap_bytes(&self) -> usize {
self.repr().heap_bytes
}
pub fn max_pattern_len(&self) -> usize {
self.repr().max_pattern_len
}
pub fn pattern_count(&self) -> usize {
self.repr().pattern_count
}
pub fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
pub fn start_state(&self) -> S {
self.repr().start_id
}
#[inline(always)]
pub fn overlapping_find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut S,
match_index: &mut usize,
) -> Option<Match> {
match *self {
DFA::Standard(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::ByteClass(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
}
}
#[inline(always)]
pub fn earliest_find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut S,
) -> Option<Match> {
match *self {
DFA::Standard(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
DFA::ByteClass(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
DFA::Premultiplied(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
DFA::PremultipliedByteClass(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
}
}
#[inline(always)]
pub fn find_at_no_state(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Option<Match> {
match *self {
DFA::Standard(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
DFA::ByteClass(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
DFA::Premultiplied(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
DFA::PremultipliedByteClass(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
}
}
}
#[derive(Clone, Debug)]
pub struct Standard<S>(Repr<S>);
impl<S: StateID> Standard<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for Standard<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
id.to_usize() < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
self.repr().get_match(id, match_index, end)
}
fn match_count(&self, id: S) -> usize {
self.repr().match_count(id)
}
fn next_state(&self, current: S, input: u8) -> S {
let o = current.to_usize() * 256 + input as usize;
self.repr().trans[o]
}
}
#[derive(Clone, Debug)]
pub struct ByteClass<S>(Repr<S>);
impl<S: StateID> ByteClass<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for ByteClass<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
id.to_usize() < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
self.repr().get_match(id, match_index, end)
}
fn match_count(&self, id: S) -> usize {
self.repr().match_count(id)
}
fn next_state(&self, current: S, input: u8) -> S {
let alphabet_len = self.repr().byte_classes.alphabet_len();
let input = self.repr().byte_classes.get(input);
let o = current.to_usize() * alphabet_len + input as usize;
self.repr().trans[o]
}
}
#[derive(Clone, Debug)]
pub struct Premultiplied<S>(Repr<S>);
impl<S: StateID> Premultiplied<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for Premultiplied<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
(id.to_usize() / 256) < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
if id > self.repr().max_match {
return None;
}
self.repr()
.matches
.get(id.to_usize() / 256)
.and_then(|m| m.get(match_index))
.map(|&(id, len)| Match { pattern: id, len, end })
}
fn match_count(&self, id: S) -> usize {
let o = id.to_usize() / 256;
self.repr().matches[o].len()
}
fn next_state(&self, current: S, input: u8) -> S {
let o = current.to_usize() + input as usize;
self.repr().trans[o]
}
}
#[derive(Clone, Debug)]
pub struct PremultipliedByteClass<S>(Repr<S>);
impl<S: StateID> PremultipliedByteClass<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for PremultipliedByteClass<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
(id.to_usize() / self.repr().alphabet_len()) < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
if id > self.repr().max_match {
return None;
}
self.repr()
.matches
.get(id.to_usize() / self.repr().alphabet_len())
.and_then(|m| m.get(match_index))
.map(|&(id, len)| Match { pattern: id, len, end })
}
fn match_count(&self, id: S) -> usize {
let o = id.to_usize() / self.repr().alphabet_len();
self.repr().matches[o].len()
}
fn next_state(&self, current: S, input: u8) -> S {
let input = self.repr().byte_classes.get(input);
let o = current.to_usize() + input as usize;
self.repr().trans[o]
}
}
#[derive(Clone, Debug)]
pub struct Repr<S> {
match_kind: MatchKind,
anchored: bool,
premultiplied: bool,
start_id: S,
/// The length, in bytes, of the longest pattern in this automaton. This
/// information is useful for keeping correct buffer sizes when searching
/// on streams.
max_pattern_len: usize,
/// The total number of patterns added to this automaton. This includes
/// patterns that may never match.
pattern_count: usize,
state_count: usize,
max_match: S,
/// The number of bytes of heap used by this NFA's transition table.
heap_bytes: usize,
/// A prefilter for quickly detecting candidate matchs, if pertinent.
prefilter: Option<PrefilterObj>,
byte_classes: ByteClasses,
trans: Vec<S>,
matches: Vec<Vec<(PatternID, PatternLength)>>,
}
impl<S: StateID> Repr<S> {
/// Returns the total alphabet size for this DFA.
///
/// If byte classes are enabled, then this corresponds to the number of
/// equivalence classes. If they are disabled, then this is always 256.
fn alphabet_len(&self) -> usize {
self.byte_classes.alphabet_len()
}
/// Returns true only if the given state is a match state.
fn is_match_state(&self, id: S) -> bool {
id <= self.max_match && id > dead_id()
}
/// Returns true only if the given state is either a dead state or a match
/// state.
fn is_match_or_dead_state(&self, id: S) -> bool {
id <= self.max_match
}
/// Get the ith match for the given state, where the end position of a
/// match was found at `end`.
///
/// # Panics
///
/// The caller must ensure that the given state identifier is valid,
/// otherwise this may panic. The `match_index` need not be valid. That is,
/// if the given state has no matches then this returns `None`.
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
if id > self.max_match {
return None;
}
self.matches
.get(id.to_usize())
.and_then(|m| m.get(match_index))
.map(|&(id, len)| Match { pattern: id, len, end })
}
/// Return the total number of matches for the given state.
///
/// # Panics
///
/// The caller must ensure that the given identifier is valid, or else
/// this panics.
fn match_count(&self, id: S) -> usize {
self.matches[id.to_usize()].len()
}
/// Get the next state given `from` as the current state and `byte` as the
/// current input byte.
fn next_state(&self, from: S, byte: u8) -> S {
let alphabet_len = self.alphabet_len();
let byte = self.byte_classes.get(byte);
self.trans[from.to_usize() * alphabet_len + byte as usize]
}
/// Set the `byte` transition for the `from` state to point to `to`.
fn set_next_state(&mut self, from: S, byte: u8, to: S) {
let alphabet_len = self.alphabet_len();
let byte = self.byte_classes.get(byte);
self.trans[from.to_usize() * alphabet_len + byte as usize] = to;
}
/// Swap the given states in place.
fn swap_states(&mut self, id1: S, id2: S) {
assert!(!self.premultiplied, "can't swap states in premultiplied DFA");
let o1 = id1.to_usize() * self.alphabet_len();
let o2 = id2.to_usize() * self.alphabet_len();
for b in 0..self.alphabet_len() {
self.trans.swap(o1 + b, o2 + b);
}
self.matches.swap(id1.to_usize(), id2.to_usize());
}
/// This routine shuffles all match states in this DFA to the beginning
/// of the DFA such that every non-match state appears after every match
/// state. (With one exception: the special fail and dead states remain as
/// the first two states.)
///
/// The purpose of doing this shuffling is to avoid an extra conditional
/// in the search loop, and in particular, detecting whether a state is a
/// match or not does not need to access any memory.
///
/// This updates `self.max_match` to point to the last matching state as
/// well as `self.start` if the starting state was moved.
fn shuffle_match_states(&mut self) {
assert!(
!self.premultiplied,
"cannot shuffle match states of premultiplied DFA"
);
if self.state_count <= 1 {
return;
}
let mut first_non_match = self.start_id.to_usize();
while first_non_match < self.state_count
&& self.matches[first_non_match].len() > 0
{
first_non_match += 1;
}
let mut swaps: Vec<S> = vec![fail_id(); self.state_count];
let mut cur = self.state_count - 1;
while cur > first_non_match {
if self.matches[cur].len() > 0 {
self.swap_states(
S::from_usize(cur),
S::from_usize(first_non_match),
);
swaps[cur] = S::from_usize(first_non_match);
swaps[first_non_match] = S::from_usize(cur);
first_non_match += 1;
while first_non_match < cur
&& self.matches[first_non_match].len() > 0
{
first_non_match += 1;
}
}
cur -= 1;
}
for id in (0..self.state_count).map(S::from_usize) {
let alphabet_len = self.alphabet_len();
let offset = id.to_usize() * alphabet_len;
for next in &mut self.trans[offset..offset + alphabet_len] {
if swaps[next.to_usize()] != fail_id() {
*next = swaps[next.to_usize()];
}
}
}
if swaps[self.start_id.to_usize()] != fail_id() {
self.start_id = swaps[self.start_id.to_usize()];
}
self.max_match = S::from_usize(first_non_match - 1);
}
fn premultiply(&mut self) -> Result<()> {
if self.premultiplied || self.state_count <= 1 {
return Ok(());
}
let alpha_len = self.alphabet_len();
premultiply_overflow_error(
S::from_usize(self.state_count - 1),
alpha_len,
)?;
for id in (2..self.state_count).map(S::from_usize) {
let offset = id.to_usize() * alpha_len;
for next in &mut self.trans[offset..offset + alpha_len] {
if *next == dead_id() {
continue;
}
*next = S::from_usize(next.to_usize() * alpha_len);
}
}
self.premultiplied = true;
self.start_id = S::from_usize(self.start_id.to_usize() * alpha_len);
self.max_match = S::from_usize(self.max_match.to_usize() * alpha_len);
Ok(())
}
/// Computes the total amount of heap used by this NFA in bytes.
fn calculate_size(&mut self) {
let mut size = (self.trans.len() * size_of::<S>())
+ (self.matches.len()
* size_of::<Vec<(PatternID, PatternLength)>>());
for state_matches in &self.matches {
size +=
state_matches.len() * size_of::<(PatternID, PatternLength)>();
}
size += self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes());
self.heap_bytes = size;
}
}
/// A builder for configuring the determinization of an NFA into a DFA.
#[derive(Clone, Debug)]
pub struct Builder {
premultiply: bool,
byte_classes: bool,
}
impl Builder {
/// Create a new builder for a DFA.
pub fn new() -> Builder {
Builder { premultiply: true, byte_classes: true }
}
/// Build a DFA from the given NFA.
///
/// This returns an error if the state identifiers exceed their
/// representation size. This can only happen when state ids are
/// premultiplied (which is enabled by default).
pub fn build<S: StateID>(&self, nfa: &NFA<S>) -> Result<DFA<S>> {
let byte_classes = if self.byte_classes {
nfa.byte_classes().clone()
} else {
ByteClasses::singletons()
};
let alphabet_len = byte_classes.alphabet_len();
let trans = vec![fail_id(); alphabet_len * nfa.state_len()];
let matches = vec![vec![]; nfa.state_len()];
let mut repr = Repr {
match_kind: nfa.match_kind().clone(),
anchored: nfa.anchored(),
premultiplied: false,
start_id: nfa.start_state(),
max_pattern_len: nfa.max_pattern_len(),
pattern_count: nfa.pattern_count(),
state_count: nfa.state_len(),
max_match: fail_id(),
heap_bytes: 0,
prefilter: nfa.prefilter_obj().map(|p| p.clone()),
byte_classes: byte_classes.clone(),
trans,
matches,
};
for id in (0..nfa.state_len()).map(S::from_usize) {
repr.matches[id.to_usize()].extend_from_slice(nfa.matches(id));
let fail = nfa.failure_transition(id);
nfa.iter_all_transitions(&byte_classes, id, |b, mut next| {
if next == fail_id() {
next = nfa_next_state_memoized(nfa, &repr, id, fail, b);
}
repr.set_next_state(id, b, next);
});
}
repr.shuffle_match_states();
repr.calculate_size();
if self.premultiply {
repr.premultiply()?;
if byte_classes.is_singleton() {
Ok(DFA::Premultiplied(Premultiplied(repr)))
} else {
Ok(DFA::PremultipliedByteClass(PremultipliedByteClass(repr)))
}
} else {
if byte_classes.is_singleton() {
Ok(DFA::Standard(Standard(repr)))
} else {
Ok(DFA::ByteClass(ByteClass(repr)))
}
}
}
/// Whether to use byte classes or in the DFA.
pub fn byte_classes(&mut self, yes: bool) -> &mut Builder {
self.byte_classes = yes;
self
}
/// Whether to premultiply state identifier in the DFA.
pub fn premultiply(&mut self, yes: bool) -> &mut Builder {
self.premultiply = yes;
self
}
}
/// This returns the next NFA transition (including resolving failure
/// transitions), except once it sees a state id less than the id of the DFA
/// state that is currently being populated, then we no longer need to follow
/// failure transitions and can instead query the pre-computed state id from
/// the DFA itself.
///
/// In general, this should only be called when a failure transition is seen.
fn nfa_next_state_memoized<S: StateID>(
nfa: &NFA<S>,
dfa: &Repr<S>,
populating: S,
mut current: S,
input: u8,
) -> S {
loop {
if current < populating {
return dfa.next_state(current, input);
}
let next = nfa.next_state(current, input);
if next != fail_id() {
return next;
}
current = nfa.failure_transition(current);
}
}

View File

@ -0,0 +1,298 @@
/*!
A library for finding occurrences of many patterns at once. This library
provides multiple pattern search principally through an implementation of the
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
which builds a fast finite state machine for executing searches in linear time.
Additionally, this library provides a number of configuration options for
building the automaton that permit controlling the space versus time trade
off. Other features include simple ASCII case insensitive matching, finding
overlapping matches, replacements, searching streams and even searching and
replacing text in streams.
Finally, unlike all other (known) Aho-Corasick implementations, this one
supports enabling
[leftmost-first](enum.MatchKind.html#variant.LeftmostFirst)
or
[leftmost-longest](enum.MatchKind.html#variant.LeftmostFirst)
match semantics, using a (seemingly) novel alternative construction algorithm.
For more details on what match semantics means, see the
[`MatchKind`](enum.MatchKind.html)
type.
# Overview
This section gives a brief overview of the primary types in this crate:
* [`AhoCorasick`](struct.AhoCorasick.html) is the primary type and represents
an Aho-Corasick automaton. This is the type you use to execute searches.
* [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) can be used to build
an Aho-Corasick automaton, and supports configuring a number of options.
* [`Match`](struct.Match.html) represents a single match reported by an
Aho-Corasick automaton. Each match has two pieces of information: the pattern
that matched and the start and end byte offsets corresponding to the position
in the haystack at which it matched.
Additionally, the [`packed`](packed/index.html) sub-module contains a lower
level API for using fast vectorized routines for finding a small number of
patterns in a haystack.
# Example: basic searching
This example shows how to search for occurrences of multiple patterns
simultaneously. Each match includes the pattern that matched along with the
byte offsets of the match.
```
use aho_corasick::AhoCorasick;
let patterns = &["apple", "maple", "Snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasick::new(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
# Example: case insensitivity
This is like the previous example, but matches `Snapple` case insensitively
using `AhoCorasickBuilder`:
```
use aho_corasick::AhoCorasickBuilder;
let patterns = &["apple", "maple", "snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
# Example: replacing matches in a stream
This example shows how to execute a search and replace on a stream without
loading the entire stream into memory first.
```
use aho_corasick::AhoCorasick;
# fn example() -> Result<(), ::std::io::Error> {
let patterns = &["fox", "brown", "quick"];
let replace_with = &["sloth", "grey", "slow"];
// In a real example, these might be `std::fs::File`s instead. All you need to
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
let rdr = "The quick brown fox.";
let mut wtr = vec![];
let ac = AhoCorasick::new(patterns);
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
# Ok(()) }; example().unwrap()
```
# Example: finding the leftmost first match
In the textbook description of Aho-Corasick, its formulation is typically
structured such that it reports all possible matches, even when they overlap
with another. In many cases, overlapping matches may not be desired, such as
the case of finding all successive non-overlapping matches like you might with
a standard regular expression.
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
this doesn't always work in the expected way, since it will report matches as
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
against the text `Samwise`. Most regex engines (that are Perl-like, or
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
algorithm modified for reporting non-overlapping matches will report `Sam`.
A novel contribution of this library is the ability to change the match
semantics of Aho-Corasick (without additional search time overhead) such that
`Samwise` is reported instead. For example, here's the standard approach:
```
use aho_corasick::AhoCorasick;
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasick::new(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
```
And now here's the leftmost-first version, which matches how a Perl-like
regex will work:
```
use aho_corasick::{AhoCorasickBuilder, MatchKind};
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
```
In addition to leftmost-first semantics, this library also supports
leftmost-longest semantics, which match the POSIX behavior of a regular
expression alternation. See
[`MatchKind`](enum.MatchKind.html)
for more details.
# Prefilters
While an Aho-Corasick automaton can perform admirably when compared to more
naive solutions, it is generally slower than more specialized algorithms that
are accelerated using vector instructions such as SIMD.
For that reason, this library will internally use a "prefilter" to attempt
to accelerate searches when possible. Currently, this library has several
different algorithms it might use depending on the patterns provided. Once the
number of patterns gets too big, prefilters are no longer used.
While a prefilter is generally good to have on by default since it works
well in the common case, it can lead to less predictable or even sub-optimal
performance in some cases. For that reason, prefilters can be explicitly
disabled via
[`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter).
*/
#![deny(missing_docs)]
// We can never be truly no_std, but we could be alloc-only some day, so
// require the std feature for now.
#[cfg(not(feature = "std"))]
compile_error!("`std` feature is currently required to build this crate");
extern crate memchr;
// #[cfg(doctest)]
// #[macro_use]
// extern crate doc_comment;
// #[cfg(doctest)]
// doctest!("../README.md");
pub use ahocorasick::{
AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind,
StreamFindIter,
};
pub use error::{Error, ErrorKind};
pub use state_id::StateID;
mod ahocorasick;
mod automaton;
mod buffer;
mod byte_frequencies;
mod classes;
mod dfa;
mod error;
mod nfa;
pub mod packed;
mod prefilter;
mod state_id;
#[cfg(test)]
mod tests;
/// A representation of a match reported by an Aho-Corasick automaton.
///
/// A match has two essential pieces of information: the identifier of the
/// pattern that matched, along with the start and end offsets of the match
/// in the haystack.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use aho_corasick::AhoCorasick;
///
/// let ac = AhoCorasick::new(&[
/// "foo", "bar", "baz",
/// ]);
/// let mat = ac.find("xxx bar xxx").expect("should have a match");
/// assert_eq!(1, mat.pattern());
/// assert_eq!(4, mat.start());
/// assert_eq!(7, mat.end());
/// ```
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Match {
/// The pattern id.
pattern: usize,
/// The length of this match, such that the starting position of the match
/// is `end - len`.
///
/// We use length here because, other than the pattern id, the only
/// information about each pattern that the automaton stores is its length.
/// So using the length here is just a bit more natural. But it isn't
/// technically required.
len: usize,
/// The end offset of the match, exclusive.
end: usize,
}
impl Match {
/// Returns the identifier of the pattern that matched.
///
/// The identifier of a pattern is derived from the position in which it
/// was originally inserted into the corresponding automaton. The first
/// pattern has identifier `0`, and each subsequent pattern is `1`, `2`
/// and so on.
#[inline]
pub fn pattern(&self) -> usize {
self.pattern
}
/// The starting position of the match.
#[inline]
pub fn start(&self) -> usize {
self.end - self.len
}
/// The ending position of the match.
#[inline]
pub fn end(&self) -> usize {
self.end
}
/// Returns true if and only if this match is empty. That is, when
/// `start() == end()`.
///
/// An empty match can only be returned when the empty string was among
/// the patterns used to build the Aho-Corasick automaton.
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
fn increment(&self, by: usize) -> Match {
Match { pattern: self.pattern, len: self.len, end: self.end + by }
}
#[inline]
fn from_span(id: usize, start: usize, end: usize) -> Match {
Match { pattern: id, len: end - start, end }
}
}

View File

@ -858,10 +858,17 @@ impl<'a, S: StateID> Compiler<'a, S> {
while let Some(id) = queue.pop_front() {
let mut it = self.nfa.iter_transitions_mut(id);
while let Some((b, next)) = it.next() {
if !seen.contains(next) {
queue.push_back(next);
seen.insert(next);
if seen.contains(next) {
// The only way to visit a duplicate state in a transition
// list is when ASCII case insensitivity is enabled. In
// this case, we want to skip it since it's redundant work.
// But it would also end up duplicating matches, which
// results in reporting duplicate matches in some cases.
// See the 'acasei010' regression test.
continue;
}
queue.push_back(next);
seen.insert(next);
let mut fail = it.nfa().state(id).fail;
while it.nfa().state(fail).next_state(b) == fail_id() {
@ -1012,10 +1019,17 @@ impl<'a, S: StateID> Compiler<'a, S> {
// Queue up the next state.
let next = item.next_queued_state(it.nfa(), next_id);
if !seen.contains(next.id) {
queue.push_back(next);
seen.insert(next.id);
if seen.contains(next.id) {
// The only way to visit a duplicate state in a transition
// list is when ASCII case insensitivity is enabled. In
// this case, we want to skip it since it's redundant work.
// But it would also end up duplicating matches, which
// results in reporting duplicate matches in some cases.
// See the 'acasei010' regression test.
continue;
}
queue.push_back(next);
seen.insert(next.id);
// Find the failure state for next. Same as standard.
let mut fail = it.nfa().state(item.id).fail;

View File

@ -80,6 +80,17 @@ pub trait Prefilter:
fn reports_false_positives(&self) -> bool {
true
}
/// Returns true if and only if this prefilter may look for a non-starting
/// position of a match.
///
/// This is useful in a streaming context where prefilters that don't look
/// for a starting position of a match can be quite difficult to deal with.
///
/// This returns false by default.
fn looks_for_non_start_of_match(&self) -> bool {
false
}
}
impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P {
@ -191,6 +202,17 @@ impl PrefilterState {
}
}
/// Create a prefilter state that always disables the prefilter.
pub fn disabled() -> PrefilterState {
PrefilterState {
skips: 0,
skipped: 0,
max_match_len: 0,
inert: true,
last_scan_at: 0,
}
}
/// Update this state with the number of bytes skipped on the last
/// invocation of the prefilter.
#[inline]
@ -285,6 +307,7 @@ impl Builder {
/// All patterns added to an Aho-Corasick automaton should be added to this
/// builder before attempting to construct the prefilter.
pub fn build(&self) -> Option<PrefilterObj> {
// match (self.start_bytes.build(), self.rare_bytes.build()) {
match (self.start_bytes.build(), self.rare_bytes.build()) {
// If we could build both start and rare prefilters, then there are
// a few cases in which we'd want to use the start-byte prefilter
@ -663,6 +686,33 @@ impl Prefilter for RareBytesOne {
fn heap_bytes(&self) -> usize {
0
}
fn looks_for_non_start_of_match(&self) -> bool {
// TODO: It should be possible to use a rare byte prefilter in a
// streaming context. The main problem is that we usually assume that
// if a prefilter has scanned some text and not found anything, then no
// match *starts* in that text. This doesn't matter in non-streaming
// contexts, but in a streaming context, if we're looking for a byte
// that doesn't start at the beginning of a match and don't find it,
// then it's still possible for a match to start at the end of the
// current buffer content. In order to fix this, the streaming searcher
// would need to become aware of prefilters that do this and use the
// appropriate offset in various places. It is quite a delicate change
// and probably shouldn't be attempted until streaming search has a
// better testing strategy. In particular, we'd really like to be able
// to vary the buffer size to force strange cases that occur at the
// edge of the buffer. If we make the buffer size minimal, then these
// cases occur more frequently and easier.
//
// This is also a bummer because this means that if the prefilter
// builder chose a rare byte prefilter, then a streaming search won't
// use any prefilter at all because the builder doesn't know how it's
// going to be used. Assuming we don't make streaming search aware of
// these special types of prefilters as described above, we could fix
// this by building a "backup" prefilter that could be used when the
// rare byte prefilter could not. But that's a bandaide. Sigh.
true
}
}
/// A prefilter for scanning for two "rare" bytes.
@ -697,6 +747,11 @@ impl Prefilter for RareBytesTwo {
fn heap_bytes(&self) -> usize {
0
}
fn looks_for_non_start_of_match(&self) -> bool {
// TODO: See Prefilter impl for RareBytesOne.
true
}
}
/// A prefilter for scanning for three "rare" bytes.
@ -732,6 +787,11 @@ impl Prefilter for RareBytesThree {
fn heap_bytes(&self) -> usize {
0
}
fn looks_for_non_start_of_match(&self) -> bool {
// TODO: See Prefilter impl for RareBytesOne.
true
}
}
/// A builder for constructing a starting byte prefilter.

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
{"files":{"CHANGELOG.md":"4d03cdc2987a1fa1b86a2de5fa57714d83cbb9d3d3f400eadecd8e8a0a857621","Cargo.toml":"a9ec8b36707f907971b410719b85e9594cb96c9e4bca6f831e2cc78ba22c71da","LICENSE":"0dd39f89842df915b8ded7ac59e8a1372cf5be36133818866cca2ef3af1a2849","README.md":"132383b73044b1e91acb9e5d69afeb8f14239cfe712aca59152bfe0c420f7a33","examples/test.rs":"4e9e73dfe80573296e93f66c2c03681908c278a758dceb4913ecb65d20e9ed86","src/lib.rs":"7a0e852a4bbfbf72c7702527d7c6f7f8c717fca77bfd4b3e78ba7f6cebed4e6f","src/line.rs":"edbdc54503342733f8aa7a4aa72a7cb08d376d53ca2b85e00a77dd42bf04bb22","src/shapes/mod.rs":"071d6ea4080dc8f1e4299258d65c32bccc40e9eb6933f3b3600576d58e7917ae","src/shapes/rectangle.rs":"ad545b9d4a628b3a515deb9b087f881b253d3f3a16a60734da82896d51c93cc9","src/text/fontconfig.rs":"c673bfcf5df387479dd2027a733d8de85461731b448202f49a9f2d1bce54f465","src/text/mod.rs":"4afd25c6297d55cd5a3956e5ae6d3921403b306533a237fe2e5eab33e65a91ee"},"package":"9b7f09f89872c2b6b29e319377b1fbe91c6f5947df19a25596e121cf19a7b35e"}

View File

@ -1,61 +0,0 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_binary",
"rust_library",
"rust_test",
)
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # MIT from expression "MIT"
])
# Generated Targets
# Unsupported target "test" with type "example" omitted
rust_library(
name = "andrew",
srcs = glob(["**/*.rs"]),
crate_features = [
],
crate_root = "src/lib.rs",
crate_type = "lib",
data = [],
edition = "2015",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "0.2.1",
# buildifier: leave-alone
deps = [
"//third_party/cargo/vendor/bitflags-1.2.1:bitflags",
"//third_party/cargo/vendor/line_drawing-0.7.0:line_drawing",
"//third_party/cargo/vendor/walkdir-2.3.1:walkdir",
"//third_party/cargo/vendor/xdg-2.2.0:xdg",
"//third_party/cargo/vendor/xml-rs-0.8.3:xml_rs",
"//third_party/cargo/vendor/rusttype-0.8.3:rusttype",
],
)

View File

@ -1,47 +0,0 @@
# Change Log
## Unreleased
## 0.2.1 -- 2019-03-29
- Fix `get_width()` for texts that start and end with spaces
## 0.2.0 -- 2019-01-26
- **[Breaking]** Canvas is now endian aware and will draw to the buffer in the endianness of the `Endian` its created with
## 0.1.6 -- 2019-01-24
- Faster drawing of horizontal and verticle lines by precomputing line boundaries
- Only calculate alpha overlay when drawing colors without a non-max alpha value for performance
## 0.1.5 -- 2019-01-13
- Fix drawing of characters with negative bounding boxes
- Fix error in `get_width()` for text without any characters
## 0.1.4 -- 2018-11-10
- Remove rusttype version restriction
## 0.1.3 -- 2018-10-09
- Move from `quick-xml` to `xml-rs` dependency
## 0.1.2 -- 2018-10-04
- Add basic/experimental support for fontconfig in `andrew::text::fontconfig`
## 0.1.1 -- 2018-09-17
- Manage dependencies to maintain rust 1.22 compatibility
- Update rusttype to 0.7.1
## 0.1.0 -- 2018-08-17
Initial version, including:
- canvas
- lines
- rectangles
- text

View File

@ -1,41 +0,0 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g. crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "andrew"
version = "0.2.1"
authors = ["Lucas Timmins <timmins.s.lucas@gmail.com>"]
description = "The andrew crate provides convenient drawing of objects such as shapes, lines and text to buffers"
readme = "README.md"
keywords = ["draw", "buffer", "shapes", "lines", "pixels"]
categories = ["rendering", "multimedia::images"]
license = "MIT"
repository = "https://github.com/trimental/andrew"
[dependencies.bitflags]
version = "1.0.3"
[dependencies.line_drawing]
version = "0.7.0"
[dependencies.rusttype]
version = "0.7.1"
[dependencies.walkdir]
version = "2.0"
[dependencies.xdg]
version = "2.1.0"
[dependencies.xml-rs]
version = "0.8.0"
[dev-dependencies.smithay-client-toolkit]
version = "0.4.0"

View File

@ -1,3 +0,0 @@
# Andrew
This crate provides convenient drawing of objects such as shapes, lines and text to buffers

View File

@ -1,210 +0,0 @@
extern crate andrew;
extern crate smithay_client_toolkit as sctk;
use std::io::{Read, Seek, SeekFrom, Write};
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
use sctk::utils::{DoubleMemPool, MemPool};
use sctk::window::{ConceptFrame, Event as WEvent, Window};
use sctk::Environment;
use sctk::reexports::client::protocol::wl_compositor::RequestsTrait as CompositorRequests;
use sctk::reexports::client::protocol::wl_surface::RequestsTrait as SurfaceRequests;
use sctk::reexports::client::protocol::{wl_shm, wl_surface};
use sctk::reexports::client::{Display, Proxy};
use andrew::shapes::rectangle;
use andrew::text;
use andrew::text::fontconfig;
fn main() {
let (display, mut event_queue) =
Display::connect_to_env().expect("Failed to connect to the wayland server.");
let env = Environment::from_display(&*display, &mut event_queue).unwrap();
let seat = env
.manager
.instantiate_auto(|seat| seat.implement(|_, _| {}, ()))
.unwrap();
let mut dimensions = (600, 400);
let surface = env
.compositor
.create_surface(|surface| surface.implement(|_, _| {}, ()))
.unwrap();
let next_action = Arc::new(Mutex::new(None::<WEvent>));
let waction = next_action.clone();
let mut window = Window::<ConceptFrame>::init_from_env(&env, surface, dimensions, move |evt| {
let mut next_action = waction.lock().unwrap();
// Keep last event in priority order : Close > Configure > Refresh
let replace = match (&evt, &*next_action) {
(_, &None)
| (_, &Some(WEvent::Refresh))
| (&WEvent::Configure { .. }, &Some(WEvent::Configure { .. }))
| (&WEvent::Close, _) => true,
_ => false,
};
if replace {
*next_action = Some(evt);
}
})
.expect("Failed to create a window !");
window.new_seat(&seat);
let mut pools = DoubleMemPool::new(&env.shm, || {}).expect("Failed to create a memory pool !");
let mut font_data = Vec::new();
::std::fs::File::open(
&fontconfig::FontConfig::new()
.unwrap()
.get_regular_family_fonts("sans")
.unwrap()[0],
)
.unwrap()
.read_to_end(&mut font_data)
.unwrap();
if !env.shell.needs_configure() {
if let Some(pool) = pools.pool() {
redraw(pool, window.surface(), dimensions, &font_data);
}
window.refresh();
}
loop {
match next_action.lock().unwrap().take() {
Some(WEvent::Close) => break,
Some(WEvent::Refresh) => {
window.refresh();
window.surface().commit();
}
Some(WEvent::Configure { new_size, .. }) => {
if let Some((w, h)) = new_size {
window.resize(w, h);
dimensions = (w, h)
}
window.refresh();
if let Some(pool) = pools.pool() {
redraw(pool, window.surface(), dimensions, &font_data);
}
}
None => {}
}
display.flush().unwrap();
event_queue.dispatch().unwrap();
}
}
fn redraw(
pool: &mut MemPool,
surface: &Proxy<wl_surface::WlSurface>,
dimensions: (u32, u32),
font_data: &[u8],
) {
let (buf_x, buf_y) = (dimensions.0 as usize, dimensions.1 as usize);
pool.resize(4 * buf_x * buf_y)
.expect("Failed to resize the memory pool.");
let mut buf: Vec<u8> = vec![255; 4 * buf_x * buf_y];
let mut canvas =
andrew::Canvas::new(&mut buf, buf_x, buf_y, 4 * buf_x, andrew::Endian::native());
println!("______________");
let mut total_dur = Duration::new(0, 0);
// Draw background
let (block_w, block_h) = (buf_x / 20, buf_y / 20);
for block_y in 0..21 {
for block_x in 0..21 {
let color = if (block_x + (block_y % 2)) % 2 == 0 {
[255, 0, 0, 0]
} else {
[255, 255, 255, 255]
};
let block = rectangle::Rectangle::new(
(block_w * block_x, block_h * block_y),
(block_w, block_h),
None,
Some(color),
);
let timer = Instant::now();
canvas.draw(&block);
total_dur += timer.elapsed()
}
}
println!("Background draw time: {:?}", total_dur);
let rectangle = rectangle::Rectangle::new(
(buf_x / 30, buf_y / 4),
(buf_x - (buf_x / 30) * 2, buf_y - buf_y / 2),
Some((
15,
[255, 170, 20, 45],
rectangle::Sides::TOP ^ rectangle::Sides::BOTTOM,
Some(10),
)),
Some([255, 170, 20, 45]),
);
let mut timer = Instant::now();
canvas.draw(&rectangle);
println!("Rectangle draw time: {:?}", timer.elapsed());
total_dur += timer.elapsed();
let text_h = buf_x as f32 / 80.;
let text_hh = text_h / 2.;
let mut text = text::Text::new(
(63, 69),
[255, 255, 255, 255],
font_data,
text_h,
2.0,
"“Life is the art of drawing without an eraser.” - John W. Gardner",
);
text.pos = (
buf_x / 2 - text.get_width() / 2,
buf_y / 2 - text_hh as usize,
);
let text_box = rectangle::Rectangle::new(
(
buf_x / 2 - text.get_width() / 2 - 10,
buf_y / 2 - text_hh as usize - 10,
),
(text.get_width() + 20, text_h as usize + 20),
Some((3, [255, 255, 255, 255], rectangle::Sides::ALL, Some(5))),
None,
);
timer = Instant::now();
canvas.draw(&text_box);
println!("Text box draw time: {:?}", timer.elapsed());
total_dur += timer.elapsed();
timer = Instant::now();
canvas.draw(&text);
println!("Text draw time: {:?}", timer.elapsed());
total_dur += timer.elapsed();
println!("Total draw time: {:?}", total_dur);
pool.seek(SeekFrom::Start(0)).unwrap();
pool.write_all(canvas.buffer).unwrap();
pool.flush().unwrap();
let new_buffer = pool.buffer(
0,
buf_x as i32,
buf_y as i32,
4 * buf_x as i32,
wl_shm::Format::Argb8888,
);
surface.attach(Some(&new_buffer), 0, 0);
surface.commit();
}

View File

@ -1,134 +0,0 @@
//! Andrew is a crate for drawing objects
#![warn(missing_docs)]
extern crate line_drawing;
extern crate rusttype;
extern crate walkdir;
extern crate xdg;
extern crate xml;
#[macro_use]
extern crate bitflags;
/// A module that contains functions and objects relating to lines
pub mod line;
/// A module that contains functions and objects relating to shapes
pub mod shapes;
/// A module that contains functions and objects relating to text
pub mod text;
/// The Drawable trait allows object to be drawn to a buffer or canvas
pub trait Drawable {
/// A function that draws the object to a canvas
fn draw(&self, canvas: &mut Canvas);
}
/// Describes an endianness (aka byte order)
#[derive(Debug, PartialEq)]
pub enum Endian {
/// Little Endian
Little,
/// Big Endian
Big,
}
impl Endian {
/// Returns the native endianness
pub fn native() -> Endian {
if cfg!(target_endian = "little") {
Endian::Little
} else {
Endian::Big
}
}
}
/// The canvas object acts as a wrapper around a buffer, providing information and functions
/// for drawing
pub struct Canvas<'a> {
/// A buffer for the canvas to draw to
pub buffer: &'a mut [u8],
/// The width in pixels of the canvas
pub width: usize,
/// The height in pixels of the canvas
pub height: usize,
/// The number of bytes between each line of pixels on the canvas
pub stride: usize,
/// The number of bytes contained in each pixel
pub pixel_size: usize,
/// The endianness of the canvas
pub endianness: Endian,
}
impl<'a> Canvas<'a> {
/// Creates a new canvas object
pub fn new(
buffer: &'a mut [u8],
width: usize,
height: usize,
stride: usize,
endianness: Endian,
) -> Canvas<'a> {
assert!(
stride % width == 0,
"Incorrect Dimensions - Stride is not a multiple of width"
);
assert!(buffer.len() == stride * height);
let pixel_size = stride / width;
Canvas {
buffer,
width,
height,
stride,
pixel_size,
endianness,
}
}
/// Draws an object that implements the Drawable trait to the buffer
pub fn draw<D: Drawable>(&mut self, drawable: &D) {
drawable.draw(self);
}
/// Draws a pixel at the x and y coordinate
pub fn draw_point(&mut self, x: usize, y: usize, color: [u8; 4]) {
let base = self.stride * y + self.pixel_size * x;
if self.endianness == Endian::Little {
if color[0] == 255 {
self.buffer[base + 3] = color[0];
self.buffer[base + 2] = color[1];
self.buffer[base + 1] = color[2];
self.buffer[base] = color[3];
} else {
for c in 0..3 {
let alpha = f32::from(color[0]) / 255.0;
let color_diff =
(color[3 - c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
self.buffer[base + c] = new_color as u8;
}
self.buffer[base + 3] = 255 as u8;
}
} else if color[0] == 255 {
self.buffer[base] = color[0];
self.buffer[base + 1] = color[1];
self.buffer[base + 2] = color[2];
self.buffer[base + 3] = color[3];
} else {
for c in 1..4 {
let alpha = f32::from(color[0]) / 255.0;
let color_diff =
(color[c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
self.buffer[base + c] = new_color as u8;
}
self.buffer[base] = 255 as u8;
}
}
/// Clears the entire canvas buffer by zeroing it
pub fn clear(&mut self) {
for i in 0..self.width * self.height * 4 {
self.buffer[i] = 0x00;
}
}
}

View File

@ -1,84 +0,0 @@
use std::cmp::min;
use line_drawing::Bresenham;
use line_drawing::XiaolinWu;
use Canvas;
use Drawable;
/// A drawable object that represents a line
pub struct Line {
/// The first point of the line
pub pt1: (usize, usize),
/// The second point of the line
pub pt2: (usize, usize),
/// The color of the line
pub color: [u8; 4],
/// Decides whether the line will be antialiased
pub antialiased: bool,
}
impl Line {
/// Creates a new Line object
pub fn new(
pt1: (usize, usize),
pt2: (usize, usize),
color: [u8; 4],
antialiased: bool,
) -> Line {
Line {
pt1,
pt2,
color,
antialiased,
}
}
}
impl Drawable for Line {
fn draw(&self, canvas: &mut Canvas) {
if !self.antialiased {
if self.pt1.0 == self.pt2.0 && self.pt1.0 < canvas.width {
let (min_y, max_y) = if self.pt1.1 > self.pt2.1 {
(self.pt2.1, self.pt1.1)
} else {
(self.pt1.1, self.pt2.1)
};
for y in min_y..min(max_y, canvas.height - 1) + 1 {
canvas.draw_point(self.pt1.0, y, self.color)
}
} else if self.pt1.1 == self.pt2.1 && self.pt1.1 < canvas.height {
let (min_x, max_x) = if self.pt1.0 > self.pt2.0 {
(self.pt2.0, self.pt1.0)
} else {
(self.pt1.0, self.pt2.0)
};
for x in min_x..min(max_x, canvas.width - 1) + 1 {
canvas.draw_point(x, self.pt1.1, self.color)
}
} else {
// Angled line without antialias
for (x, y) in Bresenham::new(
(self.pt1.0 as isize, self.pt1.1 as isize),
(self.pt2.0 as isize, self.pt2.1 as isize),
) {
if x < canvas.width as isize && y < canvas.height as isize {
canvas.draw_point(x as usize, y as usize, self.color)
}
}
}
} else {
// Angled line with antialias
for ((x, y), coverage) in XiaolinWu::<f32, isize>::new(
(self.pt1.0 as f32, self.pt1.1 as f32),
(self.pt2.0 as f32, self.pt2.1 as f32),
) {
if x < canvas.width as isize && y < canvas.height as isize {
let mut color = self.color;
color[3] = (f32::from(color[3]) * coverage) as u8;
canvas.draw_point(x as usize, y as usize, color)
}
}
}
}
}

View File

@ -1,153 +0,0 @@
use line::Line;
use Canvas;
use Drawable;
bitflags! {
/// The Sides bitflag presents the sides of a rectangle
pub struct Sides: u32 {
/// The top side of the rectangle
const TOP = 0b0001;
/// The bottom side of the rectangle
const BOTTOM = 0b0010;
/// The left side of the rectangle
const LEFT = 0b0100;
/// The right side of the rectangle
const RIGHT = 0b1000;
/// All sides of the rectangle
const ALL = Self::TOP.bits | Self::BOTTOM.bits | Self::LEFT.bits | Self::RIGHT.bits;
}
}
/// A drawable object that represents a rectangle
pub struct Rectangle {
/// Position of the top-left corner of rectangle
pub pos: (usize, usize),
/// The size of the rectangle to be drawn, the border will be contained within this size
pub size: (usize, usize),
/// The border that is drawn around the perimeter of the rectangle. It's arguments are
/// thickness of border, color of border, sides that the border is drawn around, rounding size
/// of the corners
pub border: Option<(usize, [u8; 4], Sides, Option<usize>)>,
/// The color of the fill (area) of the rectangle
pub fill: Option<[u8; 4]>,
}
impl Rectangle {
/// Creates a new Rectangle object
pub fn new(
pos: (usize, usize),
size: (usize, usize),
border: Option<(usize, [u8; 4], Sides, Option<usize>)>,
fill: Option<[u8; 4]>,
) -> Rectangle {
Rectangle {
pos,
size,
border,
fill,
}
}
fn draw_borders(&self, canvas: &mut Canvas) {
if let Some(border) = self.border {
for i in 0..border.0 {
let rounding_space = if let Some(round_size) = border.3 {
if i < round_size {
round_size
- ((round_size as f32).powi(2) - ((round_size - i - 1) as f32).powi(2))
.sqrt()
.round() as usize
} else {
0
}
} else {
0
};
// Top line
if border.2.contains(Sides::TOP) && canvas.width > rounding_space * 2 {
Line::new(
(self.pos.0 + rounding_space, self.pos.1 + i),
(self.pos.0 + self.size.0 - rounding_space, self.pos.1 + i),
border.1,
false,
)
.draw(canvas);
}
// Bottom line
if border.2.contains(Sides::BOTTOM) && canvas.width > rounding_space * 2 {
Line::new(
(self.pos.0 + rounding_space, self.pos.1 + self.size.1 - i),
(
self.pos.0 + self.size.0 - rounding_space,
self.pos.1 + self.size.1 - i,
),
border.1,
false,
)
.draw(canvas);
}
// Left line
if border.2.contains(Sides::LEFT) && canvas.height > rounding_space * 2 {
Line::new(
(self.pos.0 + i, self.pos.1 + rounding_space),
(self.pos.0 + i, self.pos.1 + self.size.1 - rounding_space),
border.1,
false,
)
.draw(canvas);
}
// Right line
if border.2.contains(Sides::RIGHT) && canvas.height > rounding_space * 2 {
Line::new(
(self.pos.0 + self.size.0 - i, self.pos.1 + rounding_space),
(
self.pos.0 + self.size.0 - i,
self.pos.1 + self.size.1 - rounding_space,
),
border.1,
false,
)
.draw(canvas);
}
}
}
}
fn draw_area(&self, canvas: &mut Canvas) {
if let Some(fill) = self.fill {
let (area_pos, area_size) = self.measure_area();
for y in area_pos.1..area_pos.1 + area_size.1 + 1 {
Line::new((area_pos.0, y), (area_pos.0 + area_size.0, y), fill, false).draw(canvas)
}
}
}
fn measure_area(&self) -> ((usize, usize), (usize, usize)) {
let (mut area_pos, mut area_size) = (self.pos, self.size);
if let Some(border) = self.border {
if border.2.contains(Sides::TOP) {
area_pos.1 += border.0;
area_size.1 -= border.0;
}
if border.2.contains(Sides::BOTTOM) {
area_size.1 -= border.0;
}
if border.2.contains(Sides::LEFT) {
area_pos.0 += border.0;
area_size.0 -= border.0;
}
if border.2.contains(Sides::RIGHT) {
area_size.0 -= border.0;
}
}
(area_pos, area_size)
}
}
impl Drawable for Rectangle {
fn draw(&self, canvas: &mut Canvas) {
self.draw_borders(canvas);
self.draw_area(canvas);
}
}

View File

@ -1,121 +0,0 @@
/// A module that contains functions and objects relating to fontconfig
pub mod fontconfig;
use rusttype::{point, Font, Scale, SharedBytes, VMetrics};
use std::fs::File;
use std::io::Read;
use std::path::PathBuf;
use Canvas;
use Drawable;
/// A drawable object that represents text
pub struct Text<'a> {
/// The position of the text on the canvas
pub pos: (usize, usize),
/// The color of the text
pub color: [u8; 4],
/// The text that is rendered to the canvas on draw
pub text: String,
/// The font used in rendering the text
pub font: Font<'a>,
/// The scale that is applied to the text
pub scale: Scale,
/// The vertical metrics of the text
pub v_metrics: VMetrics,
}
/// Loads a font file into a `Vec<u8>`
pub fn load_font_file<P: Into<PathBuf>>(path: P) -> Vec<u8> {
let mut data: Vec<u8> = Vec::new();
let mut file = File::open(path.into()).expect("Could not open font file");
file.read_to_end(&mut data)
.expect("Could not read font file");
data
}
impl<'a> Text<'a> {
/// Creates a new Text object
pub fn new<P: Into<SharedBytes<'a>>, T: Into<String>>(
pos: (usize, usize),
color: [u8; 4],
font_data: P,
height: f32,
width_scale: f32,
text: T,
) -> Text<'a> {
let text = text.into();
// Create font
let font = Font::from_bytes(font_data).expect("Error constructing Font");
// Create scale
let scale = Scale {
x: height * width_scale,
y: height,
};
// Create needed metrics
let v_metrics = font.v_metrics(scale);
Text {
pos,
color,
text: text.clone(),
scale,
v_metrics,
font,
}
}
fn draw_text(&self, canvas: &mut Canvas) {
let glyphs: Vec<_> = self
.font
.layout(&self.text, self.scale, point(0.0, self.v_metrics.ascent))
.collect();
for glyph in glyphs {
if let Some(bounding_box) = glyph.pixel_bounding_box() {
glyph.draw(|x, y, v| {
let x = ((x as usize + self.pos.0) as i32 + bounding_box.min.x) as usize;
let y = ((y as usize + self.pos.1) as i32 + bounding_box.min.y) as usize;
if x < canvas.width && y < canvas.height {
let mut color = self.color;
color[0] = (f32::from(color[0]) * v) as u8;
canvas.draw_point(x, y, color);
}
});
}
}
}
/// Calculates the width in pixels of the text
pub fn get_width(&self) -> usize {
let glyphs: Vec<_> = self
.font
.layout(&self.text, self.scale, point(0.0, self.v_metrics.ascent))
.collect();
let min_x = glyphs
.first()
.map(|g| {
if let Some(bb) = g.pixel_bounding_box() {
bb.min.x
} else {
g.position().x as i32
}
})
.unwrap_or(0);
let max_x = glyphs
.last()
.map(|g| {
if let Some(bb) = g.pixel_bounding_box() {
bb.max.x
} else {
(g.position().x + g.unpositioned().h_metrics().advance_width) as i32
}
})
.unwrap_or(0);
(max_x - min_x) as usize
}
}
impl<'a> Drawable for Text<'a> {
fn draw(&self, canvas: &mut Canvas) {
self.draw_text(canvas);
}
}

View File

@ -0,0 +1 @@
{"files":{"CHANGELOG.md":"2d031d89de7918f60e866fbdb1069a8e927112e817f384990dfe50873578e7d7","Cargo.lock":"faa3eeb18c2fab92248ac415c6e4c056e443779b58ba8bea86076c4132755e47","Cargo.toml":"aa8de0b7b23e0cb281a6bbcd1652964b70129a6a8f34f5313aa54264a25b8229","LICENSE":"0dd39f89842df915b8ded7ac59e8a1372cf5be36133818866cca2ef3af1a2849","README.md":"54274cb43d69009c72c8d28647ba28bc33f639bb0e81fb4fa8ace6ffc6c66bc2","doc_index.html":"284a4836b0eef54a3d8307e490d466fa9b1b749884fb88cf097e7cbbffb75590","examples/test.rs":"dafef175db24c64037a36397dccb0ba6879e6abb08befdb56d2fb5af47f7ffe1","src/lib.rs":"821bccf3441e41c4e55a6991d47a998c712e0a2f5ca6595752b005cbb453c3f3","src/line.rs":"cb3d3ea7938a74c169ce9c5b43111fe2793936324b499767affaf7e6c6340cf9","src/shapes/mod.rs":"071d6ea4080dc8f1e4299258d65c32bccc40e9eb6933f3b3600576d58e7917ae","src/shapes/rectangle.rs":"66b92e36d3e1df73facc5d8d37db962cb2a8d68a7e66ae6be0af9eab621ed917","src/text/fontconfig.rs":"c673bfcf5df387479dd2027a733d8de85461731b448202f49a9f2d1bce54f465","src/text/mod.rs":"4ce335cac3776b9a593989c30ffdc18b53bb14e2ab70d21eeacbb3bc710ac8cf"},"package":"8c4afb09dd642feec8408e33f92f3ffc4052946f6b20f32fb99c1f58cd4fa7cf"}

View File

@ -0,0 +1,60 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_binary",
"rust_library",
"rust_test",
)
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # MIT from expression "MIT"
])
# Generated Targets
# Unsupported target "test" with type "example" omitted
rust_library(
name = "andrew",
srcs = glob(["**/*.rs"]),
crate_features = [
],
crate_root = "src/lib.rs",
crate_type = "lib",
data = [],
edition = "2015",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "0.3.1",
# buildifier: leave-alone
deps = [
"//third_party/cargo/vendor/bitflags-1.2.1:bitflags",
"//third_party/cargo/vendor/rusttype-0.9.2:rusttype",
"//third_party/cargo/vendor/walkdir-2.3.1:walkdir",
"//third_party/cargo/vendor/xdg-2.2.0:xdg",
"//third_party/cargo/vendor/xml-rs-0.8.3:xml_rs",
],
)

View File

@ -0,0 +1,58 @@
# Change Log
## Unreleased
## 0.3.1 -- 2020-10-23
- Speed up rectangle drawing
- Remove dependency on line_drawing
- Update sctk dev dependency to 0.12
## 0.3.0 -- 2020-05-27
- Raised MSRV to `1.41.0`.
- Upgraded dependency versions.
## 0.2.1 -- 2019-03-29
- Fix `get_width()` for texts that start and end with spaces
## 0.2.0 -- 2019-01-26
- **[Breaking]** Canvas is now endian aware and will draw to the buffer in the endianness of the `Endian` its created with
## 0.1.6 -- 2019-01-24
- Faster drawing of horizontal and verticle lines by precomputing line boundaries
- Only calculate alpha overlay when drawing colors without a non-max alpha value for performance
## 0.1.5 -- 2019-01-13
- Fix drawing of characters with negative bounding boxes
- Fix error in `get_width()` for text without any characters
## 0.1.4 -- 2018-11-10
- Remove rusttype version restriction
## 0.1.3 -- 2018-10-09
- Move from `quick-xml` to `xml-rs` dependency
## 0.1.2 -- 2018-10-04
- Add basic/experimental support for fontconfig in `andrew::text::fontconfig`
## 0.1.1 -- 2018-09-17
- Manage dependencies to maintain rust 1.22 compatibility
- Update rusttype to 0.7.1
## 0.1.0 -- 2018-08-17
Initial version, including:
- canvas
- lines
- rectangles
- text

419
third_party/cargo/vendor/andrew-0.3.1/Cargo.lock generated vendored Normal file
View File

@ -0,0 +1,419 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "ab_glyph_rasterizer"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9fe5e32de01730eb1f6b7f5b51c17e03e2325bf40a74f754f04f130043affff"
[[package]]
name = "andrew"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e1ea80a5089cac999ffd4a91888154076a961d27387b0f7a6cd2d4dddb636b9"
dependencies = [
"bitflags",
"line_drawing",
"rusttype",
"walkdir",
"xdg",
"xml-rs",
]
[[package]]
name = "andrew"
version = "0.3.1"
dependencies = [
"bitflags",
"rusttype",
"smithay-client-toolkit",
"walkdir",
"xdg",
"xml-rs",
]
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "byteorder"
version = "1.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
[[package]]
name = "calloop"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b036167e76041694579972c28cf4877b4f92da222560ddb49008937b6a6727c"
dependencies = [
"log",
"nix",
]
[[package]]
name = "cc"
version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed67cbde08356238e75fc4656be4749481eeffb09e19f320a25237d5221c985d"
[[package]]
name = "cfg-if"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]]
name = "dlib"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b11f15d1e3268f140f68d390637d5e76d849782d971ae7063e0da69fe9709a76"
dependencies = [
"libloading",
]
[[package]]
name = "downcast-rs"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743"
[[package]]
name = "libloading"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3557c9384f7f757f6d139cd3a4c62ef4e850696c16bf27924a5538c8a09717a1"
dependencies = [
"cfg-if",
"winapi",
]
[[package]]
name = "line_drawing"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f81902e542483002b103c6424d23e765c2e5a65f732923299053a601bce50ab2"
dependencies = [
"num-traits 0.1.43",
]
[[package]]
name = "log"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
dependencies = [
"cfg-if",
]
[[package]]
name = "memchr"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
[[package]]
name = "memmap"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "nix"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83450fe6a6142ddd95fb064b746083fc4ef1705fe81f64a64e1d4b39f54a1055"
dependencies = [
"bitflags",
"cc",
"cfg-if",
"libc",
]
[[package]]
name = "nom"
version = "5.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
dependencies = [
"memchr",
"version_check",
]
[[package]]
name = "num-traits"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31"
dependencies = [
"num-traits 0.2.12",
]
[[package]]
name = "num-traits"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "260e51e7efe62b592207e9e13a68e43692a7a279171d6ba57abd208bf23645ad"
[[package]]
name = "owned_ttf_parser"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f923fb806c46266c02ab4a5b239735c144bdeda724a50ed058e5226f594cde3"
dependencies = [
"ttf-parser",
]
[[package]]
name = "pkg-config"
version = "0.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
[[package]]
name = "proc-macro2"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rusttype"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc7c727aded0be18c5b80c1640eae0ac8e396abf6fa8477d96cb37d18ee5ec59"
dependencies = [
"ab_glyph_rasterizer",
"owned_ttf_parser",
]
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "smallvec"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbee7696b84bbf3d89a1c2eccff0850e3047ed46bfcd2e92c29a2d074d57e252"
[[package]]
name = "smithay-client-toolkit"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ec5c077def8af49f9b5aeeb5fcf8079c638c6615c3a8f9305e2dea601de57f7"
dependencies = [
"andrew 0.3.0",
"bitflags",
"byteorder",
"calloop",
"dlib",
"lazy_static",
"log",
"memmap",
"nix",
"wayland-client",
"wayland-cursor",
"wayland-protocols",
]
[[package]]
name = "ttf-parser"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e5d7cd7ab3e47dda6e56542f4bbf3824c15234958c6e1bd6aaa347e93499fdc"
[[package]]
name = "unicode-xid"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
[[package]]
name = "version_check"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
[[package]]
name = "walkdir"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "wayland-client"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80c54f9b90b2c044784f91fe22c5619a8a9c681db38492f2fd78ff968cf3f184"
dependencies = [
"bitflags",
"downcast-rs",
"libc",
"nix",
"wayland-commons",
"wayland-scanner",
"wayland-sys",
]
[[package]]
name = "wayland-commons"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7602d75560fe6f02cac723609cce658042fe60541b5107999818d29d4dab7cfa"
dependencies = [
"nix",
"once_cell",
"smallvec",
"wayland-sys",
]
[[package]]
name = "wayland-cursor"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0446b959c5b5b4b2c11f63112fc7cbeb50ecd9f2c340d2b0ea632875685baf04"
dependencies = [
"nix",
"wayland-client",
"xcursor",
]
[[package]]
name = "wayland-protocols"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d419585bbdb150fb541579cff205c6095a86cd874530e41838d1f18a9569a08"
dependencies = [
"bitflags",
"wayland-client",
"wayland-commons",
"wayland-scanner",
]
[[package]]
name = "wayland-scanner"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cc091af4b05a435312f7cefe3a26824d2017966a58362ca913f72c3d68e5e2"
dependencies = [
"proc-macro2",
"quote",
"xml-rs",
]
[[package]]
name = "wayland-sys"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5640f53d1fe6eaaa2e77b9ff015fe9a556173ce8388607f941aecfd9b05c73e"
dependencies = [
"pkg-config",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "xcursor"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3a481cfdefd35e1c50073ae33a8000d695c98039544659f5dc5dd71311b0d01"
dependencies = [
"nom",
]
[[package]]
name = "xdg"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d089681aa106a86fade1b0128fb5daf07d5867a509ab036d99988dec80429a57"
[[package]]
name = "xml-rs"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a"

View File

@ -0,0 +1,38 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "andrew"
version = "0.3.1"
authors = ["Lucas Timmins <timmins.s.lucas@gmail.com>"]
description = "The andrew crate provides convenient drawing of objects such as shapes, lines and text to buffers"
readme = "README.md"
keywords = ["draw", "buffer", "shapes", "lines", "pixels"]
categories = ["rendering", "multimedia::images"]
license = "MIT"
repository = "https://github.com/Smithay/andrew"
[dependencies.bitflags]
version = "1.2.1"
[dependencies.rusttype]
version = "0.9.2"
[dependencies.walkdir]
version = "2.3.1"
[dependencies.xdg]
version = "2.2.0"
[dependencies.xml-rs]
version = "0.8.3"
[dev-dependencies.smithay-client-toolkit]
version = "0.12"

View File

@ -0,0 +1,13 @@
[![crates.io](http://meritbadge.herokuapp.com/andrew)](https://crates.io/crates/andrew)
[![Build Status](https://travis-ci.org/Smithay/andrew.svg?branch=master)](https://travis-ci.org/Smithay/andrew)
[![Docs Status](https://docs.rs/andrew/badge.svg)](https://docs.rs/andrew)
# Andrew
This crate provides convenient drawing of objects such as shapes, lines and text to buffers
## Documentation
The documentation for the master branch is [available online](https://smithay.github.io/andrew/).
The documentation for the releases can be found on [docs.rs](https://docs.rs/andrew).

View File

@ -0,0 +1,6 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv=refresh content=0;url=andrew/index.html />
</head>
</html>

View File

@ -0,0 +1,206 @@
extern crate andrew;
extern crate smithay_client_toolkit as sctk;
use std::io::{Read, Seek, SeekFrom, Write};
use std::time::{Duration, Instant};
use sctk::reexports::client::protocol::{wl_seat::WlSeat, wl_shm, wl_surface};
use sctk::shm::{DoubleMemPool, MemPool};
use sctk::window::{ConceptFrame, Event as WEvent};
use andrew::shapes::rectangle;
use andrew::text;
use andrew::text::fontconfig;
sctk::default_environment!(TestExample, desktop);
fn main() {
let (env, display, mut event_queue) = sctk::new_default_environment!(TestExample, desktop)
.expect("Unable to connect to a Wayland compositor");
let _seat = env.manager.instantiate_range::<WlSeat>(1, 6).unwrap();
let mut dimensions = (600, 400);
let surface = env.create_surface().detach();
let mut next_action = None::<WEvent>;
let mut window = env
.create_window::<ConceptFrame, _>(surface, None, dimensions, move |evt, mut dispatch_data| {
let next_actn = dispatch_data.get::<Option<WEvent>>().unwrap();
// Keep last event in priority order : Close > Configure > Refresh
let replace = match (&evt, &*next_actn) {
(_, &None)
| (_, &Some(WEvent::Refresh))
| (&WEvent::Configure { .. }, &Some(WEvent::Configure { .. }))
| (&WEvent::Close, _) => true,
_ => false,
};
if replace {
*next_actn = Some(evt);
}
})
.expect("Failed to create a window !");
let mut pools = DoubleMemPool::new(
env.get_global().expect("Failed to get `WlShm` global."),
|_| {},
)
.expect("Failed to create a memory pool !");
let mut font_data = Vec::new();
::std::fs::File::open(
&fontconfig::FontConfig::new()
.unwrap()
.get_regular_family_fonts("sans")
.unwrap()[0],
)
.unwrap()
.read_to_end(&mut font_data)
.unwrap();
if !env
.get_shell()
.expect("Expected environment to contain a shell.")
.needs_configure()
{
if let Some(pool) = pools.pool() {
redraw(pool, window.surface(), dimensions, &font_data);
}
window.refresh();
}
loop {
match next_action.take() {
Some(WEvent::Close) => break,
Some(WEvent::Refresh) => {
window.refresh();
window.surface().commit();
}
Some(WEvent::Configure { new_size, .. }) => {
if let Some((w, h)) = new_size {
window.resize(w, h);
dimensions = (w, h)
}
window.refresh();
if let Some(pool) = pools.pool() {
redraw(pool, window.surface(), dimensions, &font_data);
}
}
None => {}
}
display.flush().unwrap();
event_queue
.dispatch(&mut next_action, |_, _, _| {})
.unwrap();
}
}
fn redraw(
pool: &mut MemPool,
surface: &wl_surface::WlSurface,
dimensions: (u32, u32),
font_data: &[u8],
) {
let (buf_x, buf_y) = (dimensions.0 as usize, dimensions.1 as usize);
pool.resize(4 * buf_x * buf_y)
.expect("Failed to resize the memory pool.");
let mut buf: Vec<u8> = vec![255; 4 * buf_x * buf_y];
let mut canvas =
andrew::Canvas::new(&mut buf, buf_x, buf_y, 4 * buf_x, andrew::Endian::native());
println!("______________");
let mut total_dur = Duration::new(0, 0);
// Draw background
let (block_w, block_h) = (buf_x / 20, buf_y / 20);
for block_y in 0..21 {
for block_x in 0..21 {
let color = if (block_x + (block_y % 2)) % 2 == 0 {
[255, 0, 0, 0]
} else {
[255, 255, 255, 255]
};
let block = rectangle::Rectangle::new(
(block_w * block_x, block_h * block_y),
(block_w, block_h),
None,
Some(color),
);
let timer = Instant::now();
canvas.draw(&block);
total_dur += timer.elapsed()
}
}
println!("Background draw time: {:?}", total_dur);
let rectangle = rectangle::Rectangle::new(
(buf_x / 30, buf_y / 4),
(buf_x - (buf_x / 30) * 2, buf_y - buf_y / 2),
Some((
15,
[255, 170, 20, 45],
rectangle::Sides::TOP ^ rectangle::Sides::BOTTOM,
Some(10),
)),
Some([255, 170, 20, 45]),
);
let mut timer = Instant::now();
canvas.draw(&rectangle);
println!("Rectangle draw time: {:?}", timer.elapsed());
total_dur += timer.elapsed();
let text_h = buf_x as f32 / 80.;
let text_hh = text_h / 2.;
let mut text = text::Text::new(
(63, 69),
[255, 255, 255, 255],
font_data,
text_h,
2.0,
"“Life is the art of drawing without an eraser.” - John W. Gardner",
);
text.pos = (
buf_x / 2 - text.get_width() / 2,
buf_y / 2 - text_hh as usize,
);
let text_box = rectangle::Rectangle::new(
(
buf_x / 2 - text.get_width() / 2 - 10,
buf_y / 2 - text_hh as usize - 10,
),
(text.get_width() + 20, text_h as usize + 20),
Some((3, [255, 255, 255, 255], rectangle::Sides::ALL, Some(5))),
None,
);
timer = Instant::now();
canvas.draw(&text_box);
println!("Text box draw time: {:?}", timer.elapsed());
total_dur += timer.elapsed();
timer = Instant::now();
canvas.draw(&text);
println!("Text draw time: {:?}", timer.elapsed());
total_dur += timer.elapsed();
println!("Total draw time: {:?}", total_dur);
pool.seek(SeekFrom::Start(0)).unwrap();
pool.write_all(canvas.buffer).unwrap();
pool.flush().unwrap();
let new_buffer = pool.buffer(
0,
buf_x as i32,
buf_y as i32,
4 * buf_x as i32,
wl_shm::Format::Argb8888,
);
surface.attach(Some(&new_buffer), 0, 0);
surface.commit();
}

View File

@ -0,0 +1,133 @@
//! Andrew is a crate for drawing objects
#![warn(missing_docs)]
extern crate rusttype;
extern crate walkdir;
extern crate xdg;
extern crate xml;
#[macro_use]
extern crate bitflags;
/// A module that contains functions and objects relating to lines
pub mod line;
/// A module that contains functions and objects relating to shapes
pub mod shapes;
/// A module that contains functions and objects relating to text
pub mod text;
/// The Drawable trait allows object to be drawn to a buffer or canvas
pub trait Drawable {
/// A function that draws the object to a canvas
fn draw(&self, canvas: &mut Canvas);
}
/// Describes an endianness (aka byte order)
#[derive(Debug, PartialEq)]
pub enum Endian {
/// Little Endian
Little,
/// Big Endian
Big,
}
impl Endian {
/// Returns the native endianness
pub fn native() -> Endian {
if cfg!(target_endian = "little") {
Endian::Little
} else {
Endian::Big
}
}
}
/// The canvas object acts as a wrapper around a buffer, providing information and functions
/// for drawing
pub struct Canvas<'a> {
/// A buffer for the canvas to draw to
pub buffer: &'a mut [u8],
/// The width in pixels of the canvas
pub width: usize,
/// The height in pixels of the canvas
pub height: usize,
/// The number of bytes between each line of pixels on the canvas
pub stride: usize,
/// The number of bytes contained in each pixel
pub pixel_size: usize,
/// The endianness of the canvas
pub endianness: Endian,
}
impl<'a> Canvas<'a> {
/// Creates a new canvas object
pub fn new(
buffer: &'a mut [u8],
width: usize,
height: usize,
stride: usize,
endianness: Endian,
) -> Canvas<'a> {
assert!(
stride % width == 0,
"Incorrect Dimensions - Stride is not a multiple of width"
);
assert!(buffer.len() == stride * height);
let pixel_size = stride / width;
Canvas {
buffer,
width,
height,
stride,
pixel_size,
endianness,
}
}
/// Draws an object that implements the Drawable trait to the buffer
pub fn draw<D: Drawable>(&mut self, drawable: &D) {
drawable.draw(self);
}
/// Draws a pixel at the x and y coordinate
pub fn draw_point(&mut self, x: usize, y: usize, color: [u8; 4]) {
let base = self.stride * y + self.pixel_size * x;
if self.endianness == Endian::Little {
if color[0] == 255 {
self.buffer[base + 3] = color[0];
self.buffer[base + 2] = color[1];
self.buffer[base + 1] = color[2];
self.buffer[base] = color[3];
} else {
for c in 0..3 {
let alpha = f32::from(color[0]) / 255.0;
let color_diff =
(color[3 - c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
self.buffer[base + c] = new_color as u8;
}
self.buffer[base + 3] = 255 as u8;
}
} else if color[0] == 255 {
self.buffer[base] = color[0];
self.buffer[base + 1] = color[1];
self.buffer[base + 2] = color[2];
self.buffer[base + 3] = color[3];
} else {
for c in 1..4 {
let alpha = f32::from(color[0]) / 255.0;
let color_diff =
(color[c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
self.buffer[base + c] = new_color as u8;
}
self.buffer[base] = 255 as u8;
}
}
/// Clears the entire canvas buffer by zeroing it
pub fn clear(&mut self) {
for i in 0..self.width * self.height * 4 {
self.buffer[i] = 0x00;
}
}
}

View File

@ -0,0 +1,167 @@
use std::cmp::{max, min};
use Canvas;
use Drawable;
use Endian;
/// A drawable object that represents a line
pub struct Line {
/// The first point of the line
pub pt1: (usize, usize),
/// The second point of the line
pub pt2: (usize, usize),
/// The color of the line
pub color: [u8; 4],
/// Decides whether the line will be antialiased
pub antialiased: bool,
}
impl Line {
/// Creates a new Line object
pub fn new(
pt1: (usize, usize),
pt2: (usize, usize),
color: [u8; 4],
antialiased: bool,
) -> Line {
Line {
pt1,
pt2,
color,
antialiased,
}
}
}
impl Drawable for Line {
fn draw(&self, canvas: &mut Canvas) {
if !self.antialiased {
if self.pt1.0 == self.pt2.0 && self.pt1.0 < canvas.width {
let min_y = min(self.pt1.1, self.pt2.1);
let max_y = min(max(self.pt1.1, self.pt2.1), canvas.height - 1);
for y in min_y..=max_y {
canvas.draw_point(self.pt1.0, y, self.color)
}
} else if self.pt1.1 == self.pt2.1 && self.pt1.1 < canvas.height {
let min_x = min(self.pt1.0, self.pt2.0);
let max_x = min(max(self.pt1.0, self.pt2.0), canvas.width - 1);
for x in min_x..=max_x {
canvas.draw_point(x, self.pt1.1, self.color)
}
} else {
// Angled line without antialias
for (x, y) in bresenham(
self.pt1.0 as isize,
self.pt1.1 as isize,
self.pt2.0 as isize,
self.pt2.1 as isize,
) {
if x < canvas.width && y < canvas.height {
canvas.draw_point(x, y, self.color)
}
}
}
} else {
// Angled line with antialias
for (x, y, coverage) in xiaolin_wu(
self.pt1.0 as f32,
self.pt1.1 as f32,
self.pt2.0 as f32,
self.pt2.1 as f32,
) {
if x < canvas.width && y < canvas.height {
let mut color = self.color;
let base = canvas.stride * y + canvas.pixel_size * x;
if coverage != 1.0 {
if canvas.endianness == Endian::Little {
color[1] = (canvas.buffer[base + 2] as f32 * (1.0 - coverage)
+ color[1] as f32 * coverage)
as u8;
color[2] = (canvas.buffer[base + 1] as f32 * (1.0 - coverage)
+ color[2] as f32 * coverage)
as u8;
color[3] = (canvas.buffer[base] as f32 * (1.0 - coverage)
+ color[3] as f32 * coverage)
as u8;
} else {
color[1] = (canvas.buffer[base + 1] as f32 * (1.0 - coverage)
+ color[1] as f32 * coverage)
as u8;
color[2] = (canvas.buffer[base + 2] as f32 * (1.0 - coverage)
+ color[2] as f32 * coverage)
as u8;
color[3] = (canvas.buffer[base + 3] as f32 * (1.0 - coverage)
+ color[3] as f32 * coverage)
as u8;
}
}
canvas.draw_point(x as usize, y as usize, color)
}
}
}
}
}
fn bresenham(mut x0: isize, mut y0: isize, x1: isize, y1: isize) -> Vec<(usize, usize)> {
let mut points: Vec<(usize, usize)> = Vec::new();
let dx = (x1 - x0).abs();
let sx = if x0 < x1 { 1 } else { -1 };
let dy = -((y1 - y0).abs());
let sy = if y0 < y1 { 1 } else { -1 };
let mut err = dx + dy;
loop {
points.push((x0 as usize, y0 as usize));
if x0 == x1 && y0 == y1 {
break;
};
let e2 = 2 * err;
if e2 >= dy {
err += dy;
x0 += sx;
}
if e2 <= dx {
err += dx;
y0 += sy;
}
}
points
}
fn xiaolin_wu(mut x0: f32, mut y0: f32, mut x1: f32, mut y1: f32) -> Vec<(usize, usize, f32)> {
let mut points: Vec<(usize, usize, f32)> = Vec::new();
let steep = (y1 - y0).abs() > (x1 - x0).abs();
if steep {
std::mem::swap(&mut x0, &mut y0);
std::mem::swap(&mut x1, &mut y1);
}
if x0 > x1 {
std::mem::swap(&mut x0, &mut x1);
std::mem::swap(&mut y0, &mut y1);
}
let dx = x1 - x0;
let dy = y1 - y0;
let gradient = if dx == 0.0 {
1.0
} else {
dy as f32 / dx as f32
};
let mut intery = y0 + gradient;
points.push((x0 as usize, y0 as usize, 1.0));
points.push((x1 as usize, y1 as usize, 1.0));
if steep {
for x in x0 as usize + 1..=x1 as usize - 1 {
points.push((intery as usize, x, 1.0 - intery.fract()));
points.push((intery as usize + 1, x, intery.fract()));
intery = intery + gradient;
}
} else {
for x in x0 as usize + 1..=x1 as usize - 1 {
points.push((x, intery as usize, 1.0 - intery.fract()));
points.push((x, intery as usize + 1, intery.fract()));
intery = intery + gradient;
}
}
points
}

Some files were not shown because too many files have changed in this diff Show More