vendor all the shit.
parent
5565a77ac4
commit
ac8a8b0f5a
|
@ -0,0 +1 @@
|
|||
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"a2f9a1263aa35a92af4ffc1935b264f062738bc25761aa62b3d582031d6bf5f0","DESIGN.md":"44d4516ef38d60e9638f756baf40bcd9eff1b8e8ce7538a1d8549e02d6605d48","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"626d74e4bdac78d2446c75c722a7e46d0eaa4e506a1068ff693b5abc338a384f","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"46c57a83a75a8f25fdf19a15deae10748d12b8af9445ae74700a546a92024608","src/automaton.rs":"85e79ceb964f824fcceca026abd255980840116704834d70a1b9c44833df299f","src/buffer.rs":"c40992e7d1ba0bac6d1c268d41069aad81f2226686c64192ed888a60f66db8cd","src/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/classes.rs":"590f2e257bf7c630bea3a28d4a1f75c78db7a0802f5921aced017a056146b4e6","src/dfa.rs":"2fb1077edfefd2b7f7e9c0d9df55df1441d4571500a2c45aa5b41960a36441e4","src/error.rs":"36dbf2cefbfaa8a69186551320dbff023d3e82780a6c925e87c3e3997b967e66","src/lib.rs":"028ab998e8f0d1a98650b139bcca83681cbb52545060b9253b76d7e19117b53d","src/nfa.rs":"6bc3479ad37c576bba4bbdc9e3d0c6e69a4b7f0d9a88fcbbf727bf4a9b288494","src/packed/api.rs":"aa89627c7114c057c98ad1c7ab9ce18c6ed55267a6bcf7bc8efb917b6cfe5532","src/packed/mod.rs":"29c76ad3cbb1f831140cefac7a27fb504ac4af4f454975a571965b48aad417eb","src/packed/pattern.rs":"b88c57af057997da0a5a06f4c5604a7e598c20acfc11c15cd8977727f6e1cf9c","src/packed/rabinkarp.rs":"b3242a8631ea5607163dcbb641e4ac9c6da26774378da1e51651b0ab5656b390","src/packed/teddy/README.md":"5819f40d221af93288e705eadef5393a41d7a0900881b4d676e01fd65d5adf15","src/packed/teddy/compile.rs":"21b18cbee9bc33918b85b1dc51a0faed57acb426f61e6b72aeaf69faa7595701","src/packed/teddy/mod.rs":"f63db3419b1d378929bf0bc1f0e3b909ff3c38b9f2b6e86ba4546b8f39907cd3","src/packed/teddy/runtime.rs":"0a1250ea73159b3be6e0fa9a3f55ecedbb2cb90cb798d1709e9f5ee48f8855d5","src/packed/tests.rs":"0b52ab9eef73a1a4f141f475a9fa98e54d447104aa69acba3a7f8248ce7164b2","src/packed/vector.rs":"ab3c0535fca5f09198d58cbfae44c292aeb3ce44bc92bca36d30dc72963639fc","src/prefilter.rs":"3dbe93d85c6fb985a9aea0b5eab003fe81a228e02adba00c8f63a35c3fd246b8","src/state_id.rs":"ebecd7046760e6bd72303f288be93342b446e7fe95f20b5ce23653d802c48b09","src/tests.rs":"9201cc0662bc9a1e8fa15c59e33a18a55ec6b3bd6bbea294d9cace0053bb8d24"},"package":"58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"}
|
|
@ -0,0 +1,47 @@
|
|||
"""
|
||||
cargo-raze crate build file.
|
||||
|
||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||
"""
|
||||
package(default_visibility = [
|
||||
# Public for visibility by "@raze__crate__version//" targets.
|
||||
#
|
||||
# Prefer access through "//third_party/cargo", which limits external
|
||||
# visibility to explicit Cargo.toml dependencies.
|
||||
"//visibility:public",
|
||||
])
|
||||
|
||||
licenses([
|
||||
"notice", # "MIT"
|
||||
"unencumbered", # "Unlicense"
|
||||
])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||
"rust_library",
|
||||
"rust_binary",
|
||||
"rust_test",
|
||||
)
|
||||
|
||||
|
||||
|
||||
rust_library(
|
||||
name = "aho_corasick",
|
||||
crate_root = "src/lib.rs",
|
||||
crate_type = "lib",
|
||||
edition = "2015",
|
||||
srcs = glob(["**/*.rs"]),
|
||||
deps = [
|
||||
"//third_party/cargo/vendor/memchr-2.3.0:memchr",
|
||||
],
|
||||
rustc_flags = [
|
||||
"--cap-lints=allow",
|
||||
],
|
||||
version = "0.7.6",
|
||||
crate_features = [
|
||||
"default",
|
||||
"memchr",
|
||||
"std",
|
||||
],
|
||||
)
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
This project is dual-licensed under the Unlicense and MIT licenses.
|
||||
|
||||
You may use this code under the terms of either license.
|
|
@ -0,0 +1,47 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.6"
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
exclude = ["/aho-corasick-debug", "/ci/*", "/.travis.yml", "/appveyor.yml"]
|
||||
autotests = false
|
||||
description = "Fast multiple substring searching."
|
||||
homepage = "https://github.com/BurntSushi/aho-corasick"
|
||||
readme = "README.md"
|
||||
keywords = ["string", "search", "text", "aho", "multi"]
|
||||
categories = ["text-processing"]
|
||||
license = "Unlicense/MIT"
|
||||
repository = "https://github.com/BurntSushi/aho-corasick"
|
||||
[profile.bench]
|
||||
debug = true
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
[lib]
|
||||
name = "aho_corasick"
|
||||
[dependencies.memchr]
|
||||
version = "2.2.0"
|
||||
default-features = false
|
||||
[dev-dependencies.doc-comment]
|
||||
version = "0.3.1"
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
std = ["memchr/use_std"]
|
||||
[badges.appveyor]
|
||||
repository = "BurntSushi/aho-corasick"
|
||||
|
||||
[badges.travis-ci]
|
||||
repository = "BurntSushi/aho-corasick"
|
|
@ -0,0 +1,483 @@
|
|||
This document describes the internal design of this crate, which is an object
|
||||
lesson in what happens when you take a fairly simple old algorithm like
|
||||
Aho-Corasick and make it fast and production ready.
|
||||
|
||||
The target audience of this crate is Rust programmers that have some
|
||||
familiarity with string searching, however, one does not need to know the
|
||||
Aho-Corasick algorithm in order to read this (it is explained below). One
|
||||
should, however, know what a trie is. (If you don't, go read its Wikipedia
|
||||
article.)
|
||||
|
||||
The center-piece of this crate is an implementation of Aho-Corasick. On its
|
||||
own, Aho-Corasick isn't that complicated. The complex pieces come from the
|
||||
different variants of Aho-Corasick implemented in this crate. Specifically,
|
||||
they are:
|
||||
|
||||
* Aho-Corasick as an NFA, using dense transitions near root with sparse
|
||||
transitions elsewhere.
|
||||
* Aho-Corasick as a DFA. (An NFA is slower to search, but cheaper to construct
|
||||
and uses less memory.)
|
||||
* A DFA with pre-multiplied state identifiers. This saves a multiplication
|
||||
instruction in the core search loop.
|
||||
* A DFA with equivalence classes of bytes as the alphabet, instead of the
|
||||
traditional 256-byte alphabet. This shrinks the size of the DFA in memory,
|
||||
but adds an extra lookup in the core search loop to map the input byte to
|
||||
an equivalent class.
|
||||
* The option to choose how state identifiers are represented, via one of
|
||||
u8, u16, u32, u64 or usize. This permits creating compact automatons when
|
||||
matching a small number of patterns.
|
||||
* Supporting "standard" match semantics, along with its overlapping variant,
|
||||
in addition to leftmost-first and leftmost-longest semantics. The "standard"
|
||||
semantics are typically what you see in a textbook description of
|
||||
Aho-Corasick. However, Aho-Corasick is also useful as an optimization in
|
||||
regex engines, which often use leftmost-first or leftmost-longest semantics.
|
||||
Thus, it is useful to implement those semantics here. The "standard" and
|
||||
"leftmost" search algorithms are subtly different, and also require slightly
|
||||
different construction algorithms.
|
||||
* Support for ASCII case insensitive matching.
|
||||
* Support for accelerating searches when the patterns all start with a small
|
||||
number of fixed bytes. Or alternatively, when the patterns all contain a
|
||||
small number of rare bytes. (Searching for these bytes uses SIMD vectorized
|
||||
code courtesy of `memchr`.)
|
||||
* Transparent support for alternative SIMD vectorized search routines for
|
||||
smaller number of literals, such as the Teddy algorithm. We called these
|
||||
"packed" search routines because they use SIMD. They can often be an order of
|
||||
magnitude faster than just Aho-Corasick, but don't scale as well.
|
||||
* Support for searching streams. This can reuse most of the underlying code,
|
||||
but does require careful buffering support.
|
||||
* Support for anchored searches, which permit efficient `is_prefix` checks for
|
||||
a large number of patterns.
|
||||
|
||||
When you combine all of this together along with trying to make everything as
|
||||
fast as possible, what you end up with is enitrely too much code with too much
|
||||
`unsafe`. Alas, I was not smart enough to figure out how to reduce it. Instead,
|
||||
we will explain it.
|
||||
|
||||
|
||||
# Basics
|
||||
|
||||
The fundamental problem this crate is trying to solve is to determine the
|
||||
occurrences of possibly many patterns in a haystack. The naive way to solve
|
||||
this is to look for a match for each pattern at each position in the haystack:
|
||||
|
||||
for i in 0..haystack.len():
|
||||
for p in patterns.iter():
|
||||
if haystack[i..].starts_with(p.bytes()):
|
||||
return Match(p.id(), i, i + p.bytes().len())
|
||||
|
||||
Those four lines are effectively all this crate does. The problem with those
|
||||
four lines is that they are very slow, especially when you're searching for a
|
||||
large number of patterns.
|
||||
|
||||
While there are many different algorithms available to solve this, a popular
|
||||
one is Aho-Corasick. It's a common solution because it's not too hard to
|
||||
implement, scales quite well even when searching for thousands of patterns and
|
||||
is generally pretty fast. Aho-Corasick does well here because, regardless of
|
||||
the number of patterns you're searching for, it always visits each byte in the
|
||||
haystack exactly ocne. This means, generally speaking, adding more patterns to
|
||||
an Aho-Corasick automaton does not make it slower. (Strictly speaking, however,
|
||||
this is not true, since a larger automaton will make less effective use of the
|
||||
CPU's cache.)
|
||||
|
||||
Aho-Corasick can be succinctly described as a trie with state transitions
|
||||
between some of the nodes that efficiently instruct the search algorithm to
|
||||
try matching alternative keys in the automaton. The trick is that these state
|
||||
transitions are arranged such that each byte of input needs to be inspected
|
||||
only once. These state transitions are typically called "failure transitions,"
|
||||
because they instruct the searcher (the thing traversing the automaton while
|
||||
reading from the haystack) what to do when a byte in the haystack does not
|
||||
correspond to a valid transition in the current state of the trie.
|
||||
|
||||
More formally, a failure transition points to a state in the automaton that may
|
||||
lead to a match whose prefix is a proper suffix of the path traversed through
|
||||
the trie so far. (If no such proper suffix exists, then the failure transition
|
||||
points back to the start state of the trie, effectively restarting the search.)
|
||||
This is perhaps simpler to explain pictorally. For example, let's say we built
|
||||
an Aho-Corasick automaton with the following patterns: 'abcd' and 'cef'. The
|
||||
trie looks like this:
|
||||
|
||||
a - S1 - b - S2 - c - S3 - d - S4*
|
||||
/
|
||||
S0 - c - S5 - e - S6 - f - S7*
|
||||
|
||||
where states marked with a `*` are match states (meaning, the search algorithm
|
||||
should stop and report a match to the caller).
|
||||
|
||||
So given this trie, it should be somewhat straight-forward to see how it can
|
||||
be used to determine whether any particular haystack *starts* with either
|
||||
`abcd` or `cef`. It's easy to express this in code:
|
||||
|
||||
fn has_prefix(trie: &Trie, haystack: &[u8]) -> bool {
|
||||
let mut state_id = trie.start();
|
||||
// If the empty pattern is in trie, then state_id is a match state.
|
||||
if trie.is_match(state_id) {
|
||||
return true;
|
||||
}
|
||||
for (i, &b) in haystack.iter().enumerate() {
|
||||
state_id = match trie.next_state(state_id, b) {
|
||||
Some(id) => id,
|
||||
// If there was no transition for this state and byte, then we know
|
||||
// the haystack does not start with one of the patterns in our trie.
|
||||
None => return false,
|
||||
};
|
||||
if trie.is_match(state_id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
And that's pretty much it. All we do is move through the trie starting with the
|
||||
bytes at the beginning of the haystack. If we find ourselves in a position
|
||||
where we can't move, or if we've looked through the entire haystack without
|
||||
seeing a match state, then we know the haystack does not start with any of the
|
||||
patterns in the trie.
|
||||
|
||||
The meat of the Aho-Corasick algorithm is in how we add failure transitions to
|
||||
our trie to keep searching efficient. Specifically, it permits us to not only
|
||||
check whether a haystack *starts* with any one of a number of patterns, but
|
||||
rather, whether the haystack contains any of a number of patterns *anywhere* in
|
||||
the haystack.
|
||||
|
||||
As mentioned before, failure transitions connect a proper suffix of the path
|
||||
traversed through the trie before, with a path that leads to a match that has a
|
||||
prefix corresponding to that proper suffix. So in our case, for patterns `abcd`
|
||||
and `cef`, with a haystack `abcef`, we want to transition to state `S5` (from
|
||||
the diagram above) from `S3` upon seeing that the byte following `c` is not
|
||||
`d`. Namely, the proper suffix in this example is `c`, which is a prefix of
|
||||
`cef`. So the modified diagram looks like this:
|
||||
|
||||
|
||||
a - S1 - b - S2 - c - S3 - d - S4*
|
||||
/ /
|
||||
/ ----------------
|
||||
/ /
|
||||
S0 - c - S5 - e - S6 - f - S7*
|
||||
|
||||
One thing that isn't shown in this diagram is that *all* states have a failure
|
||||
transition, but only `S3` has a *non-trivial* failure transition. That is, all
|
||||
other states have a failure transition back to the start state. So if our
|
||||
haystack was `abzabcd`, then the searcher would transition back to `S0` after
|
||||
seeing `z`, which effectively restarts the search. (Because there is no pattern
|
||||
in our trie that has a prefix of `bz` or `z`.)
|
||||
|
||||
The code for traversing this *automaton* or *finite state machine* (it is no
|
||||
longer just a trie) is not that much different from the `has_prefix` code
|
||||
above:
|
||||
|
||||
fn contains(fsm: &FiniteStateMachine, haystack: &[u8]) -> bool {
|
||||
let mut state_id = fsm.start();
|
||||
// If the empty pattern is in fsm, then state_id is a match state.
|
||||
if fsm.is_match(state_id) {
|
||||
return true;
|
||||
}
|
||||
for (i, &b) in haystack.iter().enumerate() {
|
||||
// While the diagram above doesn't show this, we may wind up needing
|
||||
// to follow multiple failure transitions before we land on a state
|
||||
// in which we can advance. Therefore, when searching for the next
|
||||
// state, we need to loop until we don't see a failure transition.
|
||||
//
|
||||
// This loop terminates because the start state has no empty
|
||||
// transitions. Every transition from the start state either points to
|
||||
// another state, or loops back to the start state.
|
||||
loop {
|
||||
match fsm.next_state(state_id, b) {
|
||||
Some(id) => {
|
||||
state_id = id;
|
||||
break;
|
||||
}
|
||||
// Unlike our code above, if there was no transition for this
|
||||
// state, then we don't quit. Instead, we look for this state's
|
||||
// failure transition and follow that instead.
|
||||
None => {
|
||||
state_id = fsm.next_fail_state(state_id);
|
||||
}
|
||||
};
|
||||
}
|
||||
if fsm.is_match(state_id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
Other than the complication around traversing failure transitions, this code
|
||||
is still roughly "traverse the automaton with bytes from the haystack, and quit
|
||||
when a match is seen."
|
||||
|
||||
And that concludes our section on the basics. While we didn't go deep into
|
||||
how the automaton is built (see `src/nfa.rs`, which has detailed comments about
|
||||
that), the basic structure of Aho-Corasick should be reasonably clear.
|
||||
|
||||
|
||||
# NFAs and DFAs
|
||||
|
||||
There are generally two types of finite automata: non-deterministic finite
|
||||
automata (NFA) and deterministic finite automata (DFA). The difference between
|
||||
them is, principally, that an NFA can be in multiple states at once. This is
|
||||
typically accomplished by things called _epsilon_ transitions, where one could
|
||||
move to a new state without consuming any bytes from the input. (The other
|
||||
mechanism by which NFAs can be in more than one state is where the same byte in
|
||||
a particular state transitions to multiple distinct states.) In contrast, a DFA
|
||||
can only ever be in one state at a time. A DFA has no epsilon transitions, and
|
||||
for any given state, a byte transitions to at most one other state.
|
||||
|
||||
By this formulation, the Aho-Corasick automaton described in the previous
|
||||
section is an NFA. This is because failure transitions are, effectively,
|
||||
epsilon transitions. That is, whenever the automaton is in state `S`, it is
|
||||
actually in the set of states that are reachable by recursively following
|
||||
failure transitions from `S`. (This means that, for example, the start state
|
||||
is always active since the start state is reachable via failure transitions
|
||||
from any state in the automaton.)
|
||||
|
||||
NFAs have a lot of nice properties. They tend to be easier to construct, and
|
||||
also tend to use less memory. However, their primary downside is that they are
|
||||
typically slower to execute. For example, the code above showing how to search
|
||||
with an Aho-Corasick automaton needs to potentially iterate through many
|
||||
failure transitions for every byte of input. While this is a fairly small
|
||||
amount of overhead, this can add up, especially if the automaton has a lot of
|
||||
overlapping patterns with a lot of failure transitions.
|
||||
|
||||
A DFA's search code, by contrast, looks like this:
|
||||
|
||||
fn contains(dfa: &DFA, haystack: &[u8]) -> bool {
|
||||
let mut state_id = dfa.start();
|
||||
// If the empty pattern is in dfa, then state_id is a match state.
|
||||
if dfa.is_match(state_id) {
|
||||
return true;
|
||||
}
|
||||
for (i, &b) in haystack.iter().enumerate() {
|
||||
// An Aho-Corasick DFA *never* has a missing state that requires
|
||||
// failure transitions to be followed. One byte of input advances the
|
||||
// automaton by one state. Always.
|
||||
state_id = trie.next_state(state_id, b);
|
||||
if fsm.is_match(state_id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
The search logic here is much simpler than for the NFA, and this tends to
|
||||
translate into significant performance benefits as well, since there's a lot
|
||||
less work being done for each byte in the haystack. How is this accomplished?
|
||||
It's done by pre-following all failure transitions for all states for all bytes
|
||||
in the alphabet, and then building a single state transition table. Building
|
||||
this DFA can be much more costly than building the NFA, and use much more
|
||||
memory, but the better performance can be worth it.
|
||||
|
||||
Users of this crate can actually choose between using an NFA or a DFA. By
|
||||
default, an NFA is used, because it typically strikes the best balance between
|
||||
space usage and search performance. But the DFA option is available for cases
|
||||
where a little extra memory and upfront time building the automaton is okay.
|
||||
For example, the `AhoCorasick::auto_configure` and
|
||||
`AhoCorasickBuilder::auto_configure` methods will enable the DFA setting if
|
||||
there are a small number of patterns.
|
||||
|
||||
|
||||
# More DFA tricks
|
||||
|
||||
As described in the previous section, one of the downsides of using a DFA is
|
||||
that is uses more memory and can take longer to builder. One small way of
|
||||
mitigating these concerns is to map the alphabet used by the automaton into a
|
||||
smaller space. Typically, the alphabet of a DFA has 256 elements in it: one
|
||||
element for each possible value that fits into a byte. However, in many cases,
|
||||
one does not need the full alphabet. For example, if all patterns in an
|
||||
Aho-Corasick automaton are ASCII letters, then this only uses up 52 distinct
|
||||
bytes. As far as the automaton is concerned, the rest of the 204 bytes are
|
||||
indistinguishable from one another: they will never disrciminate between a
|
||||
match or a non-match. Therefore, in cases like that, the alphabet can be shrunk
|
||||
to just 53 elements. One for each ASCII letter, and then another to serve as a
|
||||
placeholder for every other unused byte.
|
||||
|
||||
In practice, this library doesn't quite compute the optimal set of equivalence
|
||||
classes, but it's close enough in most cases. The key idea is that this then
|
||||
allows the transition table for the DFA to be potentially much smaller. The
|
||||
downside of doing this, however, is that since the transition table is defined
|
||||
in terms of this smaller alphabet space, every byte in the haystack must be
|
||||
re-mapped to this smaller space. This requires an additional 256-byte table.
|
||||
In practice, this can lead to a small search time hit, but it can be difficult
|
||||
to measure. Moreover, it can sometimes lead to faster search times for bigger
|
||||
automata, since it could be difference between more parts of the automaton
|
||||
staying in the CPU cache or not.
|
||||
|
||||
One other trick for DFAs employed by this crate is the notion of premultiplying
|
||||
state identifiers. Specifically, the normal way to compute the next transition
|
||||
in a DFA is via the following (assuming that the transition table is laid out
|
||||
sequentially in memory, in row-major order, where the rows are states):
|
||||
|
||||
next_state_id = dfa.transitions[current_state_id * 256 + current_byte]
|
||||
|
||||
However, since the value `256` is a fixed constant, we can actually premultiply
|
||||
the state identifiers in the table when we build the table initially. Then, the
|
||||
next transition computation simply becomes:
|
||||
|
||||
next_state_id = dfa.transitions[current_state_id + current_byte]
|
||||
|
||||
This doesn't seem like much, but when this is being executed for every byte of
|
||||
input that you're searching, saving that extra multiplication instruction can
|
||||
add up.
|
||||
|
||||
The same optimization works even when equivalence classes are enabled, as
|
||||
described above. The only difference is that the premultiplication is by the
|
||||
total number of equivalence classes instead of 256.
|
||||
|
||||
There isn't much downside to premultiplying state identifiers, other than the
|
||||
fact that you may need to choose a bigger integer representation than you would
|
||||
otherwise. For example, if you don't premultiply state identifiers, then an
|
||||
automaton that uses `u8` as a state identifier can hold up to 256 states.
|
||||
However, if they are premultiplied, then it can only hold up to
|
||||
`floor(256 / len(alphabet))` states. Thus premultiplication impacts how compact
|
||||
your DFA can be. In practice, it's pretty rare to use `u8` as a state
|
||||
identifier, so premultiplication is usually a good thing to do.
|
||||
|
||||
Both equivalence classes and premultiplication are tuneable parameters via the
|
||||
`AhoCorasickBuilder` type, and both are enabled by default.
|
||||
|
||||
|
||||
# Match semantics
|
||||
|
||||
One of the more interesting things about this implementation of Aho-Corasick
|
||||
that (as far as this author knows) separates it from other implementations, is
|
||||
that it natively supports leftmost-first and leftmost-longest match semantics.
|
||||
Briefly, match semantics refer to the decision procedure by which searching
|
||||
will disambiguate matches when there are multiple to choose from:
|
||||
|
||||
* **standard** match semantics emits matches as soon as they are detected by
|
||||
the automaton. This is typically equivalent to the textbook non-overlapping
|
||||
formulation of Aho-Corasick.
|
||||
* **leftmost-first** match semantics means that 1) the next match is the match
|
||||
starting at the leftmost position and 2) among multiple matches starting at
|
||||
the same leftmost position, the match corresponding to the pattern provided
|
||||
first by the caller is reported.
|
||||
* **leftmost-longest** is like leftmost-first, except when there are multiple
|
||||
matches starting at the same leftmost position, the pattern corresponding to
|
||||
the longest match is returned.
|
||||
|
||||
(The crate API documentation discusses these differences, with examples, in
|
||||
more depth on the `MatchKind` type.)
|
||||
|
||||
The reason why supporting these match semantics is important is because it
|
||||
gives the user more control over the match procedure. For example,
|
||||
leftmost-first permits users to implement match priority by simply putting the
|
||||
higher priority patterns first. Leftmost-longest, on the other hand, permits
|
||||
finding the longest possible match, which might be useful when trying to find
|
||||
words matching a dictionary. Additionally, regex engines often want to use
|
||||
Aho-Corasick as an optimization when searching for an alternation of literals.
|
||||
In order to preserve correct match semantics, regex engines typically can't use
|
||||
the standard textbook definition directly, since regex engines will implement
|
||||
either leftmost-first (Perl-like) or leftmost-longest (POSIX) match semantics.
|
||||
|
||||
Supporting leftmost semantics requires a couple key changes:
|
||||
|
||||
* Constructing the Aho-Corasick automaton changes a bit in both how the trie is
|
||||
constructed and how failure transitions are found. Namely, only a subset of
|
||||
the failure transitions are added. Specifically, only the failure transitions
|
||||
that either do not occur after a match or do occur after a match but preserve
|
||||
that match are kept. (More details on this can be found in `src/nfa.rs`.)
|
||||
* The search algorithm changes slightly. Since we are looking for the leftmost
|
||||
match, we cannot quit as soon as a match is detected. Instead, after a match
|
||||
is detected, we must keep searching until either the end of the input or
|
||||
until a dead state is seen. (Dead states are not used for standard match
|
||||
semantics. Dead states mean that searching should stop after a match has been
|
||||
found.)
|
||||
|
||||
Other implementations of Aho-Corasick do support leftmost match semantics, but
|
||||
they do it with more overhead at search time, or even worse, with a queue of
|
||||
matches and sophisticated hijinks to disambiguate the matches. While our
|
||||
construction algorithm becomes a bit more complicated, the correct match
|
||||
semantics fall out from the structure of the automaton itself.
|
||||
|
||||
|
||||
# Overlapping matches
|
||||
|
||||
One of the nice properties of an Aho-Corasick automaton is that it can report
|
||||
all possible matches, even when they overlap with one another. In this mode,
|
||||
the match semantics don't matter, since all possible matches are reported.
|
||||
Overlapping searches work just like regular searches, except the state
|
||||
identifier at which the previous search left off is carried over to the next
|
||||
search, so that it can pick up where it left off. If there are additional
|
||||
matches at that state, then they are reported before resuming the search.
|
||||
|
||||
Enabling leftmost-first or leftmost-longest match semantics causes the
|
||||
automaton to use a subset of all failure transitions, which means that
|
||||
overlapping searches cannot be used. Therefore, if leftmost match semantics are
|
||||
used, attempting to do an overlapping search will panic. Thus, to get
|
||||
overlapping searches, the caller must use the default standard match semantics.
|
||||
This behavior was chosen because there are only two alternatives, which were
|
||||
deemed worse:
|
||||
|
||||
* Compile two automatons internally, one for standard semantics and one for
|
||||
the semantics requested by the caller (if not standard).
|
||||
* Create a new type, distinct from the `AhoCorasick` type, which has different
|
||||
capabilities based on the configuration options.
|
||||
|
||||
The first is untenable because of the amount of memory used by the automaton.
|
||||
The second increases the complexity of the API too much by adding too many
|
||||
types that do similar things. It is conceptually much simpler to keep all
|
||||
searching isolated to a single type. Callers may query whether the automaton
|
||||
supports overlapping searches via the `AhoCorasick::supports_overlapping`
|
||||
method.
|
||||
|
||||
|
||||
# Stream searching
|
||||
|
||||
Since Aho-Corasick is an automaton, it is possible to do partial searches on
|
||||
partial parts of the haystack, and then resume that search on subsequent pieces
|
||||
of the haystack. This is useful when the haystack you're trying to search is
|
||||
not stored contiguous in memory, or if one does not want to read the entire
|
||||
haystack into memory at once.
|
||||
|
||||
Currently, only standard semantics are supported for stream searching. This is
|
||||
some of the more complicated code in this crate, and is something I would very
|
||||
much like to improve. In particular, it currently has the restriction that it
|
||||
must buffer at least enough of the haystack in memory in order to fit the
|
||||
longest possible match. The difficulty in getting stream searching right is
|
||||
that the implementation choices (such as the buffer size) often impact what the
|
||||
API looks like and what it's allowed to do.
|
||||
|
||||
|
||||
# Prefilters
|
||||
|
||||
In some cases, Aho-Corasick is not the fastest way to find matches containing
|
||||
multiple patterns. Sometimes, the search can be accelerated using highly
|
||||
optimized SIMD routines. For example, consider searching the following
|
||||
patterns:
|
||||
|
||||
Sherlock
|
||||
Moriarty
|
||||
Watson
|
||||
|
||||
It is plausible that it would be much faster to quickly look for occurrences of
|
||||
the leading bytes, `S`, `M` or `W`, before trying to start searching via the
|
||||
automaton. Indeed, this is exactly what this crate will do.
|
||||
|
||||
When there are more than three distinct starting bytes, then this crate will
|
||||
look for three distinct bytes occurring at any position in the patterns, while
|
||||
preferring bytes that are heuristically determined to be rare over others. For
|
||||
example:
|
||||
|
||||
Abuzz
|
||||
Sanchez
|
||||
Vasquez
|
||||
Topaz
|
||||
Waltz
|
||||
|
||||
Here, we have more than 3 distinct starting bytes, but all of the patterns
|
||||
contain `z`, which is typically a rare byte. In this case, the prefilter will
|
||||
scan for `z`, back up a bit, and then execute the Aho-Corasick automaton.
|
||||
|
||||
If all of that fails, then a packed multiple substring algorithm will be
|
||||
attempted. Currently, the only algorithm available for this is Teddy, but more
|
||||
may be added in the future. Teddy is unlike the above prefilters in that it
|
||||
confirms its own matches, so when Teddy is active, it might not be necessary
|
||||
for Aho-Corasick to run at all. (See `Automaton::leftmost_find_at_no_state_imp`
|
||||
in `src/automaton.rs`.) However, the current Teddy implementation only works
|
||||
in `x86_64` and when SSSE3 or AVX2 are available, and moreover, only works
|
||||
_well_ when there are a small number of patterns (say, less than 100). Teddy
|
||||
also requires the haystack to be of a certain length (more than 16-34 bytes).
|
||||
When the haystack is shorter than that, Rabin-Karp is used instead. (See
|
||||
`src/packed/rabinkarp.rs`.)
|
||||
|
||||
There is a more thorough description of Teddy at
|
||||
[`src/packed/teddy/README.md`](src/packed/teddy/README.md).
|
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -0,0 +1,184 @@
|
|||
aho-corasick
|
||||
============
|
||||
A library for finding occurrences of many patterns at once with SIMD
|
||||
acceleration in some cases. This library provides multiple pattern
|
||||
search principally through an implementation of the
|
||||
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
|
||||
which builds a finite state machine for executing searches in linear time.
|
||||
Features include case insensitive matching, overlapping matches and search &
|
||||
replace in streams.
|
||||
|
||||
[![Linux build status](https://api.travis-ci.org/BurntSushi/aho-corasick.svg)](https://travis-ci.org/BurntSushi/aho-corasick)
|
||||
[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/aho-corasick?svg=true)](https://ci.appveyor.com/project/BurntSushi/aho-corasick)
|
||||
[![](http://meritbadge.herokuapp.com/aho-corasick)](https://crates.io/crates/aho-corasick)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
|
||||
### Documentation
|
||||
|
||||
https://docs.rs/aho-corasick
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
aho-corasick = "0.7"
|
||||
```
|
||||
|
||||
and this to your crate root (if you're using Rust 2015):
|
||||
|
||||
```rust
|
||||
extern crate aho_corasick;
|
||||
```
|
||||
|
||||
|
||||
### Example: basic searching
|
||||
|
||||
This example shows how to search for occurrences of multiple patterns
|
||||
simultaneously. Each match includes the pattern that matched along with the
|
||||
byte offsets of the match.
|
||||
|
||||
```rust
|
||||
use aho_corasick::AhoCorasick;
|
||||
|
||||
let patterns = &["apple", "maple", "Snapple"];
|
||||
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||
|
||||
let ac = AhoCorasick::new(patterns);
|
||||
let mut matches = vec![];
|
||||
for mat in ac.find_iter(haystack) {
|
||||
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||
}
|
||||
assert_eq!(matches, vec![
|
||||
(1, 13, 18),
|
||||
(0, 28, 33),
|
||||
(2, 43, 50),
|
||||
]);
|
||||
```
|
||||
|
||||
|
||||
### Example: case insensitivity
|
||||
|
||||
This is like the previous example, but matches `Snapple` case insensitively
|
||||
using `AhoCorasickBuilder`:
|
||||
|
||||
```rust
|
||||
use aho_corasick::AhoCorasickBuilder;
|
||||
|
||||
let patterns = &["apple", "maple", "snapple"];
|
||||
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||
|
||||
let ac = AhoCorasickBuilder::new()
|
||||
.ascii_case_insensitive(true)
|
||||
.build(patterns);
|
||||
let mut matches = vec![];
|
||||
for mat in ac.find_iter(haystack) {
|
||||
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||
}
|
||||
assert_eq!(matches, vec![
|
||||
(1, 13, 18),
|
||||
(0, 28, 33),
|
||||
(2, 43, 50),
|
||||
]);
|
||||
```
|
||||
|
||||
|
||||
### Example: replacing matches in a stream
|
||||
|
||||
This example shows how to execute a search and replace on a stream without
|
||||
loading the entire stream into memory first.
|
||||
|
||||
```rust
|
||||
use aho_corasick::AhoCorasick;
|
||||
|
||||
# fn example() -> Result<(), ::std::io::Error> {
|
||||
let patterns = &["fox", "brown", "quick"];
|
||||
let replace_with = &["sloth", "grey", "slow"];
|
||||
|
||||
// In a real example, these might be `std::fs::File`s instead. All you need to
|
||||
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
|
||||
let rdr = "The quick brown fox.";
|
||||
let mut wtr = vec![];
|
||||
|
||||
let ac = AhoCorasick::new(patterns);
|
||||
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
|
||||
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
|
||||
# Ok(()) }; example().unwrap()
|
||||
```
|
||||
|
||||
|
||||
### Example: finding the leftmost first match
|
||||
|
||||
In the textbook description of Aho-Corasick, its formulation is typically
|
||||
structured such that it reports all possible matches, even when they overlap
|
||||
with another. In many cases, overlapping matches may not be desired, such as
|
||||
the case of finding all successive non-overlapping matches like you might with
|
||||
a standard regular expression.
|
||||
|
||||
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
|
||||
this doesn't always work in the expected way, since it will report matches as
|
||||
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
|
||||
against the text `Samwise`. Most regex engines (that are Perl-like, or
|
||||
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
|
||||
algorithm modified for reporting non-overlapping matches will report `Sam`.
|
||||
|
||||
A novel contribution of this library is the ability to change the match
|
||||
semantics of Aho-Corasick (without additional search time overhead) such that
|
||||
`Samwise` is reported instead. For example, here's the standard approach:
|
||||
|
||||
```rust
|
||||
use aho_corasick::AhoCorasick;
|
||||
|
||||
let patterns = &["Samwise", "Sam"];
|
||||
let haystack = "Samwise";
|
||||
|
||||
let ac = AhoCorasick::new(patterns);
|
||||
let mat = ac.find(haystack).expect("should have a match");
|
||||
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
|
||||
```
|
||||
|
||||
And now here's the leftmost-first version, which matches how a Perl-like
|
||||
regex will work:
|
||||
|
||||
```rust
|
||||
use aho_corasick::{AhoCorasickBuilder, MatchKind};
|
||||
|
||||
let patterns = &["Samwise", "Sam"];
|
||||
let haystack = "Samwise";
|
||||
|
||||
let ac = AhoCorasickBuilder::new()
|
||||
.match_kind(MatchKind::LeftmostFirst)
|
||||
.build(patterns);
|
||||
let mat = ac.find(haystack).expect("should have a match");
|
||||
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
|
||||
```
|
||||
|
||||
In addition to leftmost-first semantics, this library also supports
|
||||
leftmost-longest semantics, which match the POSIX behavior of a regular
|
||||
expression alternation. See `MatchKind` in the docs for more details.
|
||||
|
||||
|
||||
### Minimum Rust version policy
|
||||
|
||||
This crate's minimum supported `rustc` version is `1.28.0`.
|
||||
|
||||
In general, this crate will be conservative with respect to the minimum
|
||||
supported version of Rust. In general, it will follow the `regex` crate's
|
||||
policy, since `regex` is an important dependent.
|
||||
|
||||
|
||||
### Future work
|
||||
|
||||
Here are some plans for the future:
|
||||
|
||||
* Assuming the current API is sufficient, I'd like to commit to it and release
|
||||
a `1.0` version of this crate some time in the next 6-12 months.
|
||||
* Support stream searching with leftmost match semantics. Currently, only
|
||||
standard match semantics are supported. Getting this right seems possible,
|
||||
but is tricky since the match state needs to be propagated through multiple
|
||||
searches. (With standard semantics, as soon as a match is seen the search
|
||||
ends.)
|
|
@ -0,0 +1,24 @@
|
|||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
|
@ -0,0 +1,2 @@
|
|||
max_width = 79
|
||||
use_small_heuristics = "max"
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,589 @@
|
|||
use ahocorasick::MatchKind;
|
||||
use prefilter::{self, Candidate, Prefilter, PrefilterState};
|
||||
use state_id::{dead_id, fail_id, StateID};
|
||||
use Match;
|
||||
|
||||
// NOTE: This trait essentially started as a copy of the same trait from from
|
||||
// regex-automata, with some wording changed since we use this trait for
|
||||
// NFAs in addition to DFAs in this crate. Additionally, we do not export
|
||||
// this trait. It's only used internally to reduce code duplication. The
|
||||
// regex-automata crate needs to expose it because its Regex type is generic
|
||||
// over implementations of this trait. In this crate, we encapsulate everything
|
||||
// behind the AhoCorasick type.
|
||||
//
|
||||
// This trait is a bit of a mess, but it's not quite clear how to fix it.
|
||||
// Basically, there are several competing concerns:
|
||||
//
|
||||
// * We need performance, so everything effectively needs to get monomorphized.
|
||||
// * There are several variations on searching Aho-Corasick automatons:
|
||||
// overlapping, standard and leftmost. Overlapping and standard are somewhat
|
||||
// combined together below, but there is no real way to combine standard with
|
||||
// leftmost. Namely, leftmost requires continuing a search even after a match
|
||||
// is found, in order to correctly disambiguate a match.
|
||||
// * On top of that, *sometimes* callers want to know which state the automaton
|
||||
// is in after searching. This is principally useful for overlapping and
|
||||
// stream searches. However, when callers don't care about this, we really
|
||||
// do not want to be forced to compute it, since it sometimes requires extra
|
||||
// work. Thus, there are effectively two copies of leftmost searching: one
|
||||
// for tracking the state ID and one that doesn't. We should ideally do the
|
||||
// same for standard searching, but my sanity stopped me.
|
||||
|
||||
/// A trait describing the interface of an Aho-Corasick finite state machine.
|
||||
///
|
||||
/// Every automaton has exactly one fail state, one dead state and exactly one
|
||||
/// start state. Generally, these correspond to the first, second and third
|
||||
/// states, respectively. The failure state is always treated as a sentinel.
|
||||
/// That is, no correct Aho-Corasick automaton will ever transition into the
|
||||
/// fail state. The dead state, however, can be transitioned into, but only
|
||||
/// when leftmost-first or leftmost-longest match semantics are enabled and
|
||||
/// only when at least one match has been observed.
|
||||
///
|
||||
/// Every automaton also has one or more match states, such that
|
||||
/// `Automaton::is_match_state_unchecked(id)` returns `true` if and only if
|
||||
/// `id` corresponds to a match state.
|
||||
pub trait Automaton {
|
||||
/// The representation used for state identifiers in this automaton.
|
||||
///
|
||||
/// Typically, this is one of `u8`, `u16`, `u32`, `u64` or `usize`.
|
||||
type ID: StateID;
|
||||
|
||||
/// The type of matching that should be done.
|
||||
fn match_kind(&self) -> &MatchKind;
|
||||
|
||||
/// Returns true if and only if this automaton uses anchored searches.
|
||||
fn anchored(&self) -> bool;
|
||||
|
||||
/// An optional prefilter for quickly skipping to the next candidate match.
|
||||
/// A prefilter must report at least every match, although it may report
|
||||
/// positions that do not correspond to a match. That is, it must not allow
|
||||
/// false negatives, but can allow false positives.
|
||||
///
|
||||
/// Currently, a prefilter only runs when the automaton is in the start
|
||||
/// state. That is, the position reported by a prefilter should always
|
||||
/// correspond to the start of a potential match.
|
||||
fn prefilter(&self) -> Option<&dyn Prefilter>;
|
||||
|
||||
/// Return the identifier of this automaton's start state.
|
||||
fn start_state(&self) -> Self::ID;
|
||||
|
||||
/// Returns true if and only if the given state identifier refers to a
|
||||
/// valid state.
|
||||
fn is_valid(&self, id: Self::ID) -> bool;
|
||||
|
||||
/// Returns true if and only if the given identifier corresponds to a match
|
||||
/// state.
|
||||
///
|
||||
/// The state ID given must be valid, or else implementors may panic.
|
||||
fn is_match_state(&self, id: Self::ID) -> bool;
|
||||
|
||||
/// Returns true if and only if the given identifier corresponds to a state
|
||||
/// that is either the dead state or a match state.
|
||||
///
|
||||
/// Depending on the implementation of the automaton, this routine can
|
||||
/// be used to save a branch in the core matching loop. Nevertheless,
|
||||
/// `is_match_state(id) || id == dead_id()` is always a valid
|
||||
/// implementation. Indeed, this is the default implementation.
|
||||
///
|
||||
/// The state ID given must be valid, or else implementors may panic.
|
||||
fn is_match_or_dead_state(&self, id: Self::ID) -> bool {
|
||||
id == dead_id() || self.is_match_state(id)
|
||||
}
|
||||
|
||||
/// If the given state is a match state, return the match corresponding
|
||||
/// to the given match index. `end` must be the ending position of the
|
||||
/// detected match. If no match exists or if `match_index` exceeds the
|
||||
/// number of matches in this state, then `None` is returned.
|
||||
///
|
||||
/// The state ID given must be valid, or else implementors may panic.
|
||||
///
|
||||
/// If the given state ID is correct and if the `match_index` is less than
|
||||
/// the number of matches for that state, then this is guaranteed to return
|
||||
/// a match.
|
||||
fn get_match(
|
||||
&self,
|
||||
id: Self::ID,
|
||||
match_index: usize,
|
||||
end: usize,
|
||||
) -> Option<Match>;
|
||||
|
||||
/// Returns the number of matches for the given state. If the given state
|
||||
/// is not a match state, then this returns 0.
|
||||
///
|
||||
/// The state ID given must be valid, or else implementors must panic.
|
||||
fn match_count(&self, id: Self::ID) -> usize;
|
||||
|
||||
/// Given the current state that this automaton is in and the next input
|
||||
/// byte, this method returns the identifier of the next state. The
|
||||
/// identifier returned must always be valid and may never correspond to
|
||||
/// the fail state. The returned identifier may, however, point to the
|
||||
/// dead state.
|
||||
///
|
||||
/// This is not safe so that implementors may look up the next state
|
||||
/// without memory safety checks such as bounds checks. As such, callers
|
||||
/// must ensure that the given identifier corresponds to a valid automaton
|
||||
/// state. Implementors must, in turn, ensure that this routine is safe for
|
||||
/// all valid state identifiers and for all possible `u8` values.
|
||||
unsafe fn next_state_unchecked(
|
||||
&self,
|
||||
current: Self::ID,
|
||||
input: u8,
|
||||
) -> Self::ID;
|
||||
|
||||
/// Like next_state_unchecked, but debug_asserts that the underlying
|
||||
/// implementation never returns a `fail_id()` for the next state.
|
||||
unsafe fn next_state_unchecked_no_fail(
|
||||
&self,
|
||||
current: Self::ID,
|
||||
input: u8,
|
||||
) -> Self::ID {
|
||||
let next = self.next_state_unchecked(current, input);
|
||||
// We should never see a transition to the failure state.
|
||||
debug_assert!(
|
||||
next != fail_id(),
|
||||
"automaton should never return fail_id for next state"
|
||||
);
|
||||
next
|
||||
}
|
||||
|
||||
/// Execute a search using standard match semantics.
|
||||
///
|
||||
/// This can be used even when the automaton was constructed with leftmost
|
||||
/// match semantics when you want to find the earliest possible match. This
|
||||
/// can also be used as part of an overlapping search implementation.
|
||||
///
|
||||
/// N.B. This does not report a match if `state_id` is given as a matching
|
||||
/// state. As such, this should not be used directly.
|
||||
#[inline(always)]
|
||||
fn standard_find_at(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
state_id: &mut Self::ID,
|
||||
) -> Option<Match> {
|
||||
if let Some(pre) = self.prefilter() {
|
||||
self.standard_find_at_imp(
|
||||
prestate,
|
||||
Some(pre),
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
)
|
||||
} else {
|
||||
self.standard_find_at_imp(prestate, None, haystack, at, state_id)
|
||||
}
|
||||
}
|
||||
|
||||
// It's important for this to always be inlined. Namely, it's only caller
|
||||
// is standard_find_at, and the inlining should remove the case analysis
|
||||
// for prefilter scanning when there is no prefilter available.
|
||||
#[inline(always)]
|
||||
fn standard_find_at_imp(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
prefilter: Option<&dyn Prefilter>,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
state_id: &mut Self::ID,
|
||||
) -> Option<Match> {
|
||||
// This is necessary for guaranteeing a safe API, since we use the
|
||||
// state ID below in a function that exhibits UB if called with an
|
||||
// invalid state ID.
|
||||
assert!(
|
||||
self.is_valid(*state_id),
|
||||
"{} is not a valid state ID",
|
||||
state_id.to_usize()
|
||||
);
|
||||
unsafe {
|
||||
let start = haystack.as_ptr();
|
||||
let end = haystack[haystack.len()..].as_ptr();
|
||||
let mut ptr = haystack[at..].as_ptr();
|
||||
while ptr < end {
|
||||
if let Some(pre) = prefilter {
|
||||
let at = ptr as usize - start as usize;
|
||||
if prestate.is_effective(at)
|
||||
&& *state_id == self.start_state()
|
||||
{
|
||||
let c = prefilter::next(prestate, pre, haystack, at)
|
||||
.into_option();
|
||||
match c {
|
||||
None => return None,
|
||||
Some(i) => {
|
||||
ptr = start.offset(i as isize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// SAFETY: next_state is safe for all possible u8 values,
|
||||
// so the only thing we're concerned about is the validity
|
||||
// of `state_id`. `state_id` either comes from the caller
|
||||
// (in which case, we assert above that it is valid), or it
|
||||
// comes from the return value of next_state, which is also
|
||||
// guaranteed to be valid.
|
||||
*state_id = self.next_state_unchecked_no_fail(*state_id, *ptr);
|
||||
ptr = ptr.offset(1);
|
||||
// This routine always quits immediately after seeing a
|
||||
// match, and since dead states can only come after seeing
|
||||
// a match, seeing a dead state here is impossible. (Unless
|
||||
// we have an anchored automaton, in which case, dead states
|
||||
// are used to stop a search.)
|
||||
debug_assert!(
|
||||
*state_id != dead_id() || self.anchored(),
|
||||
"standard find should never see a dead state"
|
||||
);
|
||||
|
||||
if self.is_match_or_dead_state(*state_id) {
|
||||
return if *state_id == dead_id() {
|
||||
None
|
||||
} else {
|
||||
let end = ptr as usize - start as usize;
|
||||
self.get_match(*state_id, 0, end)
|
||||
};
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a search using leftmost (either first or longest) match
|
||||
/// semantics.
|
||||
///
|
||||
/// The principle difference between searching with standard semantics and
|
||||
/// searching with leftmost semantics is that leftmost searching will
|
||||
/// continue searching even after a match has been found. Once a match
|
||||
/// is found, the search does not stop until either the haystack has been
|
||||
/// exhausted or a dead state is observed in the automaton. (Dead states
|
||||
/// only exist in automatons constructed with leftmost semantics.) That is,
|
||||
/// we rely on the construction of the automaton to tell us when to quit.
|
||||
#[inline(never)]
|
||||
fn leftmost_find_at(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
state_id: &mut Self::ID,
|
||||
) -> Option<Match> {
|
||||
if let Some(pre) = self.prefilter() {
|
||||
self.leftmost_find_at_imp(
|
||||
prestate,
|
||||
Some(pre),
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
)
|
||||
} else {
|
||||
self.leftmost_find_at_imp(prestate, None, haystack, at, state_id)
|
||||
}
|
||||
}
|
||||
|
||||
// It's important for this to always be inlined. Namely, it's only caller
|
||||
// is leftmost_find_at, and the inlining should remove the case analysis
|
||||
// for prefilter scanning when there is no prefilter available.
|
||||
#[inline(always)]
|
||||
fn leftmost_find_at_imp(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
prefilter: Option<&dyn Prefilter>,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
state_id: &mut Self::ID,
|
||||
) -> Option<Match> {
|
||||
debug_assert!(self.match_kind().is_leftmost());
|
||||
// This is necessary for guaranteeing a safe API, since we use the
|
||||
// state ID below in a function that exhibits UB if called with an
|
||||
// invalid state ID.
|
||||
assert!(
|
||||
self.is_valid(*state_id),
|
||||
"{} is not a valid state ID",
|
||||
state_id.to_usize()
|
||||
);
|
||||
if self.anchored() && at > 0 && *state_id == self.start_state() {
|
||||
return None;
|
||||
}
|
||||
unsafe {
|
||||
let start = haystack.as_ptr();
|
||||
let end = haystack[haystack.len()..].as_ptr();
|
||||
let mut ptr = haystack[at..].as_ptr();
|
||||
|
||||
let mut last_match = self.get_match(*state_id, 0, at);
|
||||
while ptr < end {
|
||||
if let Some(pre) = prefilter {
|
||||
let at = ptr as usize - start as usize;
|
||||
if prestate.is_effective(at)
|
||||
&& *state_id == self.start_state()
|
||||
{
|
||||
let c = prefilter::next(prestate, pre, haystack, at)
|
||||
.into_option();
|
||||
match c {
|
||||
None => return None,
|
||||
Some(i) => {
|
||||
ptr = start.offset(i as isize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// SAFETY: next_state is safe for all possible u8 values,
|
||||
// so the only thing we're concerned about is the validity
|
||||
// of `state_id`. `state_id` either comes from the caller
|
||||
// (in which case, we assert above that it is valid), or it
|
||||
// comes from the return value of next_state, which is also
|
||||
// guaranteed to be valid.
|
||||
*state_id = self.next_state_unchecked_no_fail(*state_id, *ptr);
|
||||
ptr = ptr.offset(1);
|
||||
if self.is_match_or_dead_state(*state_id) {
|
||||
if *state_id == dead_id() {
|
||||
// The only way to enter into a dead state is if a
|
||||
// match has been found, so we assert as much. This
|
||||
// is different from normal automata, where you might
|
||||
// enter a dead state if you know a subsequent match
|
||||
// will never be found (regardless of whether a match
|
||||
// has already been found). For Aho-Corasick, it is
|
||||
// built so that we can match at any position, so the
|
||||
// possibility of a match always exists.
|
||||
//
|
||||
// (Unless we have an anchored automaton, in which
|
||||
// case, dead states are used to stop a search.)
|
||||
debug_assert!(
|
||||
last_match.is_some() || self.anchored(),
|
||||
"failure state should only be seen after match"
|
||||
);
|
||||
return last_match;
|
||||
}
|
||||
let end = ptr as usize - start as usize;
|
||||
last_match = self.get_match(*state_id, 0, end);
|
||||
}
|
||||
}
|
||||
last_match
|
||||
}
|
||||
}
|
||||
|
||||
/// This is like leftmost_find_at, but does not need to track a caller
|
||||
/// provided state id. In other words, the only output of this routine is a
|
||||
/// match, if one exists.
|
||||
///
|
||||
/// It is regrettable that we need to effectively copy a chunk of
|
||||
/// implementation twice, but when we don't need to track the state ID, we
|
||||
/// can allow the prefilter to report matches immediately without having
|
||||
/// to re-confirm them with the automaton. The re-confirmation step is
|
||||
/// necessary in leftmost_find_at because tracing through the automaton is
|
||||
/// the only way to correctly set the state ID. (Perhaps an alternative
|
||||
/// would be to keep a map from pattern ID to matching state ID, but that
|
||||
/// complicates the code and still doesn't permit us to defer to the
|
||||
/// prefilter entirely when possible.)
|
||||
///
|
||||
/// I did try a few things to avoid the code duplication here, but nothing
|
||||
/// optimized as well as this approach. (In microbenchmarks, there was
|
||||
/// about a 25% difference.)
|
||||
#[inline(never)]
|
||||
fn leftmost_find_at_no_state(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Option<Match> {
|
||||
if let Some(pre) = self.prefilter() {
|
||||
self.leftmost_find_at_no_state_imp(
|
||||
prestate,
|
||||
Some(pre),
|
||||
haystack,
|
||||
at,
|
||||
)
|
||||
} else {
|
||||
self.leftmost_find_at_no_state_imp(prestate, None, haystack, at)
|
||||
}
|
||||
}
|
||||
|
||||
// It's important for this to always be inlined. Namely, it's only caller
|
||||
// is leftmost_find_at_no_state, and the inlining should remove the case
|
||||
// analysis for prefilter scanning when there is no prefilter available.
|
||||
#[inline(always)]
|
||||
fn leftmost_find_at_no_state_imp(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
prefilter: Option<&dyn Prefilter>,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Option<Match> {
|
||||
debug_assert!(self.match_kind().is_leftmost());
|
||||
if self.anchored() && at > 0 {
|
||||
return None;
|
||||
}
|
||||
// If our prefilter handles confirmation of matches 100% of the
|
||||
// time, and since we don't need to track state IDs, we can avoid
|
||||
// Aho-Corasick completely.
|
||||
if let Some(pre) = prefilter {
|
||||
// We should never have a prefilter during an anchored search.
|
||||
debug_assert!(!self.anchored());
|
||||
if !pre.reports_false_positives() {
|
||||
return match pre.next_candidate(prestate, haystack, at) {
|
||||
Candidate::None => None,
|
||||
Candidate::Match(m) => Some(m),
|
||||
Candidate::PossibleStartOfMatch(_) => unreachable!(),
|
||||
};
|
||||
}
|
||||
}
|
||||
let mut state_id = self.start_state();
|
||||
unsafe {
|
||||
let start = haystack.as_ptr();
|
||||
let end = haystack[haystack.len()..].as_ptr();
|
||||
let mut ptr = haystack[at..].as_ptr();
|
||||
|
||||
let mut last_match = self.get_match(state_id, 0, at);
|
||||
while ptr < end {
|
||||
if let Some(pre) = prefilter {
|
||||
let at = ptr as usize - start as usize;
|
||||
if prestate.is_effective(at)
|
||||
&& state_id == self.start_state()
|
||||
{
|
||||
match prefilter::next(prestate, pre, haystack, at) {
|
||||
Candidate::None => return None,
|
||||
// Since we aren't tracking a state ID, we can
|
||||
// quit early once we know we have a match.
|
||||
Candidate::Match(m) => return Some(m),
|
||||
Candidate::PossibleStartOfMatch(i) => {
|
||||
ptr = start.offset(i as isize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// SAFETY: next_state is safe for all possible u8 values,
|
||||
// so the only thing we're concerned about is the validity
|
||||
// of `state_id`. `state_id` either comes from the caller
|
||||
// (in which case, we assert above that it is valid), or it
|
||||
// comes from the return value of next_state, which is also
|
||||
// guaranteed to be valid.
|
||||
state_id = self.next_state_unchecked_no_fail(state_id, *ptr);
|
||||
ptr = ptr.offset(1);
|
||||
if self.is_match_or_dead_state(state_id) {
|
||||
if state_id == dead_id() {
|
||||
// The only way to enter into a dead state is if a
|
||||
// match has been found, so we assert as much. This
|
||||
// is different from normal automata, where you might
|
||||
// enter a dead state if you know a subsequent match
|
||||
// will never be found (regardless of whether a match
|
||||
// has already been found). For Aho-Corasick, it is
|
||||
// built so that we can match at any position, so the
|
||||
// possibility of a match always exists.
|
||||
//
|
||||
// (Unless we have an anchored automaton, in which
|
||||
// case, dead states are used to stop a search.)
|
||||
debug_assert!(
|
||||
last_match.is_some() || self.anchored(),
|
||||
"failure state should only be seen after match"
|
||||
);
|
||||
return last_match;
|
||||
}
|
||||
let end = ptr as usize - start as usize;
|
||||
last_match = self.get_match(state_id, 0, end);
|
||||
}
|
||||
}
|
||||
last_match
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute an overlapping search.
|
||||
///
|
||||
/// When executing an overlapping match, the previous state ID in addition
|
||||
/// to the previous match index should be given. If there are more matches
|
||||
/// at the given state, then the match is reported and the given index is
|
||||
/// incremented.
|
||||
#[inline(always)]
|
||||
fn overlapping_find_at(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
state_id: &mut Self::ID,
|
||||
match_index: &mut usize,
|
||||
) -> Option<Match> {
|
||||
if self.anchored() && at > 0 && *state_id == self.start_state() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let match_count = self.match_count(*state_id);
|
||||
if *match_index < match_count {
|
||||
// This is guaranteed to return a match since
|
||||
// match_index < match_count.
|
||||
let result = self.get_match(*state_id, *match_index, at);
|
||||
debug_assert!(result.is_some(), "must be a match");
|
||||
*match_index += 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
*match_index = 0;
|
||||
match self.standard_find_at(prestate, haystack, at, state_id) {
|
||||
None => None,
|
||||
Some(m) => {
|
||||
*match_index = 1;
|
||||
Some(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the earliest match found. This returns as soon as we know that
|
||||
/// we have a match. As such, this does not necessarily correspond to the
|
||||
/// leftmost starting match, but rather, the leftmost position at which a
|
||||
/// match ends.
|
||||
#[inline(always)]
|
||||
fn earliest_find_at(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
state_id: &mut Self::ID,
|
||||
) -> Option<Match> {
|
||||
if *state_id == self.start_state() {
|
||||
if self.anchored() && at > 0 {
|
||||
return None;
|
||||
}
|
||||
if let Some(m) = self.get_match(*state_id, 0, at) {
|
||||
return Some(m);
|
||||
}
|
||||
}
|
||||
self.standard_find_at(prestate, haystack, at, state_id)
|
||||
}
|
||||
|
||||
/// A convenience function for finding the next match according to the
|
||||
/// match semantics of this automaton. For standard match semantics, this
|
||||
/// finds the earliest match. Otherwise, the leftmost match is found.
|
||||
#[inline(always)]
|
||||
fn find_at(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
state_id: &mut Self::ID,
|
||||
) -> Option<Match> {
|
||||
match *self.match_kind() {
|
||||
MatchKind::Standard => {
|
||||
self.earliest_find_at(prestate, haystack, at, state_id)
|
||||
}
|
||||
MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => {
|
||||
self.leftmost_find_at(prestate, haystack, at, state_id)
|
||||
}
|
||||
MatchKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Like find_at, but does not track state identifiers. This permits some
|
||||
/// optimizations when a prefilter that confirms its own matches is
|
||||
/// present.
|
||||
#[inline(always)]
|
||||
fn find_at_no_state(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Option<Match> {
|
||||
match *self.match_kind() {
|
||||
MatchKind::Standard => {
|
||||
let mut state = self.start_state();
|
||||
self.earliest_find_at(prestate, haystack, at, &mut state)
|
||||
}
|
||||
MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => {
|
||||
self.leftmost_find_at_no_state(prestate, haystack, at)
|
||||
}
|
||||
MatchKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,128 @@
|
|||
use std::cmp;
|
||||
use std::io;
|
||||
use std::ptr;
|
||||
|
||||
/// The default buffer capacity that we use for the stream buffer.
|
||||
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB
|
||||
|
||||
/// A fairly simple roll buffer for supporting stream searches.
|
||||
///
|
||||
/// This buffer acts as a temporary place to store a fixed amount of data when
|
||||
/// reading from a stream. Its central purpose is to allow "rolling" some
|
||||
/// suffix of the data to the beginning of the buffer before refilling it with
|
||||
/// more data from the stream. For example, let's say we are trying to match
|
||||
/// "foobar" on a stream. When we report the match, we'd like to not only
|
||||
/// report the correct offsets at which the match occurs, but also the matching
|
||||
/// bytes themselves. So let's say our stream is a file with the following
|
||||
/// contents: `test test foobar test test`. Now assume that we happen to read
|
||||
/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
|
||||
/// Naively, it would not be possible to report a single contiguous `foobar`
|
||||
/// match, but this roll buffer allows us to do that. Namely, after the second
|
||||
/// read, the contents of the buffer should be `st foobar test test`, where the
|
||||
/// search should ultimately resume immediately after `foo`. (The prefix `st `
|
||||
/// is included because the roll buffer saves N bytes at the end of the buffer,
|
||||
/// where N is the maximum possible length of a match.)
|
||||
///
|
||||
/// A lot of the logic for dealing with this is unfortunately split out between
|
||||
/// this roll buffer and the `StreamChunkIter`.
|
||||
#[derive(Debug)]
|
||||
pub struct Buffer {
|
||||
/// The raw buffer contents. This has a fixed size and never increases.
|
||||
buf: Vec<u8>,
|
||||
/// The minimum size of the buffer, which is equivalent to the maximum
|
||||
/// possible length of a match. This corresponds to the amount that we
|
||||
/// roll
|
||||
min: usize,
|
||||
/// The end of the contents of this buffer.
|
||||
end: usize,
|
||||
}
|
||||
|
||||
impl Buffer {
|
||||
/// Create a new buffer for stream searching. The minimum buffer length
|
||||
/// given should be the size of the maximum possible match length.
|
||||
pub fn new(min_buffer_len: usize) -> Buffer {
|
||||
let min = cmp::max(1, min_buffer_len);
|
||||
// The minimum buffer amount is also the amount that we roll our
|
||||
// buffer in order to support incremental searching. To this end,
|
||||
// our actual capacity needs to be at least 1 byte bigger than our
|
||||
// minimum amount, otherwise we won't have any overlap. In actuality,
|
||||
// we want our buffer to be a bit bigger than that for performance
|
||||
// reasons, so we set a lower bound of `8 * min`.
|
||||
//
|
||||
// TODO: It would be good to find a way to test the streaming
|
||||
// implementation with the minimal buffer size.
|
||||
let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
|
||||
Buffer { buf: vec![0; capacity], min, end: 0 }
|
||||
}
|
||||
|
||||
/// Return the contents of this buffer.
|
||||
#[inline]
|
||||
pub fn buffer(&self) -> &[u8] {
|
||||
&self.buf[..self.end]
|
||||
}
|
||||
|
||||
/// Return the minimum size of the buffer. The only way a buffer may be
|
||||
/// smaller than this is if the stream itself contains less than the
|
||||
/// minimum buffer amount.
|
||||
#[inline]
|
||||
pub fn min_buffer_len(&self) -> usize {
|
||||
self.min
|
||||
}
|
||||
|
||||
/// Return the total length of the contents in the buffer.
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.end
|
||||
}
|
||||
|
||||
/// Return all free capacity in this buffer.
|
||||
fn free_buffer(&mut self) -> &mut [u8] {
|
||||
&mut self.buf[self.end..]
|
||||
}
|
||||
|
||||
/// Refill the contents of this buffer by reading as much as possible into
|
||||
/// this buffer's free capacity. If no more bytes could be read, then this
|
||||
/// returns false. Otherwise, this reads until it has filled the buffer
|
||||
/// past the minimum amount.
|
||||
pub fn fill<R: io::Read>(&mut self, mut rdr: R) -> io::Result<bool> {
|
||||
let mut readany = false;
|
||||
loop {
|
||||
let readlen = rdr.read(self.free_buffer())?;
|
||||
if readlen == 0 {
|
||||
return Ok(readany);
|
||||
}
|
||||
readany = true;
|
||||
self.end += readlen;
|
||||
if self.len() >= self.min {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Roll the contents of the buffer so that the suffix of this buffer is
|
||||
/// moved to the front and all other contents are dropped. The size of the
|
||||
/// suffix corresponds precisely to the minimum buffer length.
|
||||
///
|
||||
/// This should only be called when the entire contents of this buffer have
|
||||
/// been searched.
|
||||
pub fn roll(&mut self) {
|
||||
let roll_start = self
|
||||
.end
|
||||
.checked_sub(self.min)
|
||||
.expect("buffer capacity should be bigger than minimum amount");
|
||||
let roll_len = self.min;
|
||||
|
||||
assert!(roll_start + roll_len <= self.end);
|
||||
unsafe {
|
||||
// SAFETY: A buffer contains Copy data, so there's no problem
|
||||
// moving it around. Safety also depends on our indices being in
|
||||
// bounds, which they always should be, given the assert above.
|
||||
ptr::copy(
|
||||
self.buf[roll_start..].as_ptr(),
|
||||
self.buf.as_mut_ptr(),
|
||||
roll_len,
|
||||
);
|
||||
}
|
||||
self.end = roll_len;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,258 @@
|
|||
pub const BYTE_FREQUENCIES: [u8; 256] = [
|
||||
55, // '\x00'
|
||||
52, // '\x01'
|
||||
51, // '\x02'
|
||||
50, // '\x03'
|
||||
49, // '\x04'
|
||||
48, // '\x05'
|
||||
47, // '\x06'
|
||||
46, // '\x07'
|
||||
45, // '\x08'
|
||||
103, // '\t'
|
||||
242, // '\n'
|
||||
66, // '\x0b'
|
||||
67, // '\x0c'
|
||||
229, // '\r'
|
||||
44, // '\x0e'
|
||||
43, // '\x0f'
|
||||
42, // '\x10'
|
||||
41, // '\x11'
|
||||
40, // '\x12'
|
||||
39, // '\x13'
|
||||
38, // '\x14'
|
||||
37, // '\x15'
|
||||
36, // '\x16'
|
||||
35, // '\x17'
|
||||
34, // '\x18'
|
||||
33, // '\x19'
|
||||
56, // '\x1a'
|
||||
32, // '\x1b'
|
||||
31, // '\x1c'
|
||||
30, // '\x1d'
|
||||
29, // '\x1e'
|
||||
28, // '\x1f'
|
||||
255, // ' '
|
||||
148, // '!'
|
||||
164, // '"'
|
||||
149, // '#'
|
||||
136, // '$'
|
||||
160, // '%'
|
||||
155, // '&'
|
||||
173, // "'"
|
||||
221, // '('
|
||||
222, // ')'
|
||||
134, // '*'
|
||||
122, // '+'
|
||||
232, // ','
|
||||
202, // '-'
|
||||
215, // '.'
|
||||
224, // '/'
|
||||
208, // '0'
|
||||
220, // '1'
|
||||
204, // '2'
|
||||
187, // '3'
|
||||
183, // '4'
|
||||
179, // '5'
|
||||
177, // '6'
|
||||
168, // '7'
|
||||
178, // '8'
|
||||
200, // '9'
|
||||
226, // ':'
|
||||
195, // ';'
|
||||
154, // '<'
|
||||
184, // '='
|
||||
174, // '>'
|
||||
126, // '?'
|
||||
120, // '@'
|
||||
191, // 'A'
|
||||
157, // 'B'
|
||||
194, // 'C'
|
||||
170, // 'D'
|
||||
189, // 'E'
|
||||
162, // 'F'
|
||||
161, // 'G'
|
||||
150, // 'H'
|
||||
193, // 'I'
|
||||
142, // 'J'
|
||||
137, // 'K'
|
||||
171, // 'L'
|
||||
176, // 'M'
|
||||
185, // 'N'
|
||||
167, // 'O'
|
||||
186, // 'P'
|
||||
112, // 'Q'
|
||||
175, // 'R'
|
||||
192, // 'S'
|
||||
188, // 'T'
|
||||
156, // 'U'
|
||||
140, // 'V'
|
||||
143, // 'W'
|
||||
123, // 'X'
|
||||
133, // 'Y'
|
||||
128, // 'Z'
|
||||
147, // '['
|
||||
138, // '\\'
|
||||
146, // ']'
|
||||
114, // '^'
|
||||
223, // '_'
|
||||
151, // '`'
|
||||
249, // 'a'
|
||||
216, // 'b'
|
||||
238, // 'c'
|
||||
236, // 'd'
|
||||
253, // 'e'
|
||||
227, // 'f'
|
||||
218, // 'g'
|
||||
230, // 'h'
|
||||
247, // 'i'
|
||||
135, // 'j'
|
||||
180, // 'k'
|
||||
241, // 'l'
|
||||
233, // 'm'
|
||||
246, // 'n'
|
||||
244, // 'o'
|
||||
231, // 'p'
|
||||
139, // 'q'
|
||||
245, // 'r'
|
||||
243, // 's'
|
||||
251, // 't'
|
||||
235, // 'u'
|
||||
201, // 'v'
|
||||
196, // 'w'
|
||||
240, // 'x'
|
||||
214, // 'y'
|
||||
152, // 'z'
|
||||
182, // '{'
|
||||
205, // '|'
|
||||
181, // '}'
|
||||
127, // '~'
|
||||
27, // '\x7f'
|
||||
212, // '\x80'
|
||||
211, // '\x81'
|
||||
210, // '\x82'
|
||||
213, // '\x83'
|
||||
228, // '\x84'
|
||||
197, // '\x85'
|
||||
169, // '\x86'
|
||||
159, // '\x87'
|
||||
131, // '\x88'
|
||||
172, // '\x89'
|
||||
105, // '\x8a'
|
||||
80, // '\x8b'
|
||||
98, // '\x8c'
|
||||
96, // '\x8d'
|
||||
97, // '\x8e'
|
||||
81, // '\x8f'
|
||||
207, // '\x90'
|
||||
145, // '\x91'
|
||||
116, // '\x92'
|
||||
115, // '\x93'
|
||||
144, // '\x94'
|
||||
130, // '\x95'
|
||||
153, // '\x96'
|
||||
121, // '\x97'
|
||||
107, // '\x98'
|
||||
132, // '\x99'
|
||||
109, // '\x9a'
|
||||
110, // '\x9b'
|
||||
124, // '\x9c'
|
||||
111, // '\x9d'
|
||||
82, // '\x9e'
|
||||
108, // '\x9f'
|
||||
118, // '\xa0'
|
||||
141, // '¡'
|
||||
113, // '¢'
|
||||
129, // '£'
|
||||
119, // '¤'
|
||||
125, // '¥'
|
||||
165, // '¦'
|
||||
117, // '§'
|
||||
92, // '¨'
|
||||
106, // '©'
|
||||
83, // 'ª'
|
||||
72, // '«'
|
||||
99, // '¬'
|
||||
93, // '\xad'
|
||||
65, // '®'
|
||||
79, // '¯'
|
||||
166, // '°'
|
||||
237, // '±'
|
||||
163, // '²'
|
||||
199, // '³'
|
||||
190, // '´'
|
||||
225, // 'µ'
|
||||
209, // '¶'
|
||||
203, // '·'
|
||||
198, // '¸'
|
||||
217, // '¹'
|
||||
219, // 'º'
|
||||
206, // '»'
|
||||
234, // '¼'
|
||||
248, // '½'
|
||||
158, // '¾'
|
||||
239, // '¿'
|
||||
255, // 'À'
|
||||
255, // 'Á'
|
||||
255, // 'Â'
|
||||
255, // 'Ã'
|
||||
255, // 'Ä'
|
||||
255, // 'Å'
|
||||
255, // 'Æ'
|
||||
255, // 'Ç'
|
||||
255, // 'È'
|
||||
255, // 'É'
|
||||
255, // 'Ê'
|
||||
255, // 'Ë'
|
||||
255, // 'Ì'
|
||||
255, // 'Í'
|
||||
255, // 'Î'
|
||||
255, // 'Ï'
|
||||
255, // 'Ð'
|
||||
255, // 'Ñ'
|
||||
255, // 'Ò'
|
||||
255, // 'Ó'
|
||||
255, // 'Ô'
|
||||
255, // 'Õ'
|
||||
255, // 'Ö'
|
||||
255, // '×'
|
||||
255, // 'Ø'
|
||||
255, // 'Ù'
|
||||
255, // 'Ú'
|
||||
255, // 'Û'
|
||||
255, // 'Ü'
|
||||
255, // 'Ý'
|
||||
255, // 'Þ'
|
||||
255, // 'ß'
|
||||
255, // 'à'
|
||||
255, // 'á'
|
||||
255, // 'â'
|
||||
255, // 'ã'
|
||||
255, // 'ä'
|
||||
255, // 'å'
|
||||
255, // 'æ'
|
||||
255, // 'ç'
|
||||
255, // 'è'
|
||||
255, // 'é'
|
||||
255, // 'ê'
|
||||
255, // 'ë'
|
||||
255, // 'ì'
|
||||
255, // 'í'
|
||||
255, // 'î'
|
||||
255, // 'ï'
|
||||
255, // 'ð'
|
||||
255, // 'ñ'
|
||||
255, // 'ò'
|
||||
255, // 'ó'
|
||||
255, // 'ô'
|
||||
255, // 'õ'
|
||||
255, // 'ö'
|
||||
255, // '÷'
|
||||
255, // 'ø'
|
||||
255, // 'ù'
|
||||
255, // 'ú'
|
||||
255, // 'û'
|
||||
255, // 'ü'
|
||||
255, // 'ý'
|
||||
255, // 'þ'
|
||||
255, // 'ÿ'
|
||||
];
|
|
@ -0,0 +1,238 @@
|
|||
use std::fmt;
|
||||
|
||||
/// A representation of byte oriented equivalence classes.
|
||||
///
|
||||
/// This is used in an FSM to reduce the size of the transition table. This can
|
||||
/// have a particularly large impact not only on the total size of an FSM, but
|
||||
/// also on compile times.
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct ByteClasses([u8; 256]);
|
||||
|
||||
impl ByteClasses {
|
||||
/// Creates a new set of equivalence classes where all bytes are mapped to
|
||||
/// the same class.
|
||||
pub fn empty() -> ByteClasses {
|
||||
ByteClasses([0; 256])
|
||||
}
|
||||
|
||||
/// Creates a new set of equivalence classes where each byte belongs to
|
||||
/// its own equivalence class.
|
||||
pub fn singletons() -> ByteClasses {
|
||||
let mut classes = ByteClasses::empty();
|
||||
for i in 0..256 {
|
||||
classes.set(i as u8, i as u8);
|
||||
}
|
||||
classes
|
||||
}
|
||||
|
||||
/// Set the equivalence class for the given byte.
|
||||
#[inline]
|
||||
pub fn set(&mut self, byte: u8, class: u8) {
|
||||
self.0[byte as usize] = class;
|
||||
}
|
||||
|
||||
/// Get the equivalence class for the given byte.
|
||||
#[inline]
|
||||
pub fn get(&self, byte: u8) -> u8 {
|
||||
// SAFETY: This is safe because all dense transitions have
|
||||
// exactly 256 elements, so all u8 values are valid indices.
|
||||
unsafe { *self.0.get_unchecked(byte as usize) }
|
||||
}
|
||||
|
||||
/// Return the total number of elements in the alphabet represented by
|
||||
/// these equivalence classes. Equivalently, this returns the total number
|
||||
/// of equivalence classes.
|
||||
#[inline]
|
||||
pub fn alphabet_len(&self) -> usize {
|
||||
self.0[255] as usize + 1
|
||||
}
|
||||
|
||||
/// Returns true if and only if every byte in this class maps to its own
|
||||
/// equivalence class. Equivalently, there are 256 equivalence classes
|
||||
/// and each class contains exactly one byte.
|
||||
#[inline]
|
||||
pub fn is_singleton(&self) -> bool {
|
||||
self.alphabet_len() == 256
|
||||
}
|
||||
|
||||
/// Returns an iterator over a sequence of representative bytes from each
|
||||
/// equivalence class. Namely, this yields exactly N items, where N is
|
||||
/// equivalent to the number of equivalence classes. Each item is an
|
||||
/// arbitrary byte drawn from each equivalence class.
|
||||
///
|
||||
/// This is useful when one is determinizing an NFA and the NFA's alphabet
|
||||
/// hasn't been converted to equivalence classes yet. Picking an arbitrary
|
||||
/// byte from each equivalence class then permits a full exploration of
|
||||
/// the NFA instead of using every possible byte value.
|
||||
pub fn representatives(&self) -> ByteClassRepresentatives {
|
||||
ByteClassRepresentatives { classes: self, byte: 0, last_class: None }
|
||||
}
|
||||
|
||||
/// Returns all of the bytes in the given equivalence class.
|
||||
///
|
||||
/// The second element in the tuple indicates the number of elements in
|
||||
/// the array.
|
||||
fn elements(&self, equiv: u8) -> ([u8; 256], usize) {
|
||||
let (mut array, mut len) = ([0; 256], 0);
|
||||
for b in 0..256 {
|
||||
if self.get(b as u8) == equiv {
|
||||
array[len] = b as u8;
|
||||
len += 1;
|
||||
}
|
||||
}
|
||||
(array, len)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ByteClasses {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.is_singleton() {
|
||||
write!(f, "ByteClasses({{singletons}})")
|
||||
} else {
|
||||
write!(f, "ByteClasses(")?;
|
||||
for equiv in 0..self.alphabet_len() {
|
||||
let (members, len) = self.elements(equiv as u8);
|
||||
write!(f, " {} => {:?}", equiv, &members[..len])?;
|
||||
}
|
||||
write!(f, ")")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over representative bytes from each equivalence class.
|
||||
#[derive(Debug)]
|
||||
pub struct ByteClassRepresentatives<'a> {
|
||||
classes: &'a ByteClasses,
|
||||
byte: usize,
|
||||
last_class: Option<u8>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ByteClassRepresentatives<'a> {
|
||||
type Item = u8;
|
||||
|
||||
fn next(&mut self) -> Option<u8> {
|
||||
while self.byte < 256 {
|
||||
let byte = self.byte as u8;
|
||||
let class = self.classes.get(byte);
|
||||
self.byte += 1;
|
||||
|
||||
if self.last_class != Some(class) {
|
||||
self.last_class = Some(class);
|
||||
return Some(byte);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// A byte class builder keeps track of an *approximation* of equivalence
|
||||
/// classes of bytes during NFA construction. That is, every byte in an
|
||||
/// equivalence class cannot discriminate between a match and a non-match.
|
||||
///
|
||||
/// For example, in the literals `abc` and `xyz`, the bytes [\x00-`], [d-w]
|
||||
/// and [{-\xFF] never discriminate between a match and a non-match, precisely
|
||||
/// because they never occur in the literals anywhere.
|
||||
///
|
||||
/// Note though that this does not necessarily compute the minimal set of
|
||||
/// equivalence classes. For example, in the literals above, the byte ranges
|
||||
/// [\x00-`], [d-w] and [{-\xFF] are all treated as distinct equivalence
|
||||
/// classes even though they could be treated a single class. The reason for
|
||||
/// this is implementation complexity. In the future, we should endeavor to
|
||||
/// compute the minimal equivalence classes since they can have a rather large
|
||||
/// impact on the size of the DFA.
|
||||
///
|
||||
/// The representation here is 256 booleans, all initially set to false. Each
|
||||
/// boolean maps to its corresponding byte based on position. A `true` value
|
||||
/// indicates the end of an equivalence class, where its corresponding byte
|
||||
/// and all of the bytes corresponding to all previous contiguous `false`
|
||||
/// values are in the same equivalence class.
|
||||
///
|
||||
/// This particular representation only permits contiguous ranges of bytes to
|
||||
/// be in the same equivalence class, which means that we can never discover
|
||||
/// the true minimal set of equivalence classes.
|
||||
#[derive(Debug)]
|
||||
pub struct ByteClassBuilder(Vec<bool>);
|
||||
|
||||
impl ByteClassBuilder {
|
||||
/// Create a new builder of byte classes where all bytes are part of the
|
||||
/// same equivalence class.
|
||||
pub fn new() -> ByteClassBuilder {
|
||||
ByteClassBuilder(vec![false; 256])
|
||||
}
|
||||
|
||||
/// Indicate the the range of byte given (inclusive) can discriminate a
|
||||
/// match between it and all other bytes outside of the range.
|
||||
pub fn set_range(&mut self, start: u8, end: u8) {
|
||||
debug_assert!(start <= end);
|
||||
if start > 0 {
|
||||
self.0[start as usize - 1] = true;
|
||||
}
|
||||
self.0[end as usize] = true;
|
||||
}
|
||||
|
||||
/// Build byte classes that map all byte values to their corresponding
|
||||
/// equivalence class. The last mapping indicates the largest equivalence
|
||||
/// class identifier (which is never bigger than 255).
|
||||
pub fn build(&self) -> ByteClasses {
|
||||
let mut classes = ByteClasses::empty();
|
||||
let mut class = 0u8;
|
||||
let mut i = 0;
|
||||
loop {
|
||||
classes.set(i as u8, class as u8);
|
||||
if i >= 255 {
|
||||
break;
|
||||
}
|
||||
if self.0[i] {
|
||||
class = class.checked_add(1).unwrap();
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
classes
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn byte_classes() {
|
||||
let mut set = ByteClassBuilder::new();
|
||||
set.set_range(b'a', b'z');
|
||||
|
||||
let classes = set.build();
|
||||
assert_eq!(classes.get(0), 0);
|
||||
assert_eq!(classes.get(1), 0);
|
||||
assert_eq!(classes.get(2), 0);
|
||||
assert_eq!(classes.get(b'a' - 1), 0);
|
||||
assert_eq!(classes.get(b'a'), 1);
|
||||
assert_eq!(classes.get(b'm'), 1);
|
||||
assert_eq!(classes.get(b'z'), 1);
|
||||
assert_eq!(classes.get(b'z' + 1), 2);
|
||||
assert_eq!(classes.get(254), 2);
|
||||
assert_eq!(classes.get(255), 2);
|
||||
|
||||
let mut set = ByteClassBuilder::new();
|
||||
set.set_range(0, 2);
|
||||
set.set_range(4, 6);
|
||||
let classes = set.build();
|
||||
assert_eq!(classes.get(0), 0);
|
||||
assert_eq!(classes.get(1), 0);
|
||||
assert_eq!(classes.get(2), 0);
|
||||
assert_eq!(classes.get(3), 1);
|
||||
assert_eq!(classes.get(4), 2);
|
||||
assert_eq!(classes.get(5), 2);
|
||||
assert_eq!(classes.get(6), 2);
|
||||
assert_eq!(classes.get(7), 3);
|
||||
assert_eq!(classes.get(255), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn full_byte_classes() {
|
||||
let mut set = ByteClassBuilder::new();
|
||||
for i in 0..256u16 {
|
||||
set.set_range(i as u8, i as u8);
|
||||
}
|
||||
assert_eq!(set.build().alphabet_len(), 256);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,709 @@
|
|||
use std::mem::size_of;
|
||||
|
||||
use ahocorasick::MatchKind;
|
||||
use automaton::Automaton;
|
||||
use classes::ByteClasses;
|
||||
use error::Result;
|
||||
use nfa::{PatternID, PatternLength, NFA};
|
||||
use prefilter::{Prefilter, PrefilterObj, PrefilterState};
|
||||
use state_id::{dead_id, fail_id, premultiply_overflow_error, StateID};
|
||||
use Match;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum DFA<S> {
|
||||
Standard(Standard<S>),
|
||||
ByteClass(ByteClass<S>),
|
||||
Premultiplied(Premultiplied<S>),
|
||||
PremultipliedByteClass(PremultipliedByteClass<S>),
|
||||
}
|
||||
|
||||
impl<S: StateID> DFA<S> {
|
||||
fn repr(&self) -> &Repr<S> {
|
||||
match *self {
|
||||
DFA::Standard(ref dfa) => dfa.repr(),
|
||||
DFA::ByteClass(ref dfa) => dfa.repr(),
|
||||
DFA::Premultiplied(ref dfa) => dfa.repr(),
|
||||
DFA::PremultipliedByteClass(ref dfa) => dfa.repr(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn match_kind(&self) -> &MatchKind {
|
||||
&self.repr().match_kind
|
||||
}
|
||||
|
||||
pub fn heap_bytes(&self) -> usize {
|
||||
self.repr().heap_bytes
|
||||
}
|
||||
|
||||
pub fn max_pattern_len(&self) -> usize {
|
||||
self.repr().max_pattern_len
|
||||
}
|
||||
|
||||
pub fn pattern_count(&self) -> usize {
|
||||
self.repr().pattern_count
|
||||
}
|
||||
|
||||
pub fn start_state(&self) -> S {
|
||||
self.repr().start_id
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn overlapping_find_at(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
state_id: &mut S,
|
||||
match_index: &mut usize,
|
||||
) -> Option<Match> {
|
||||
match *self {
|
||||
DFA::Standard(ref dfa) => dfa.overlapping_find_at(
|
||||
prestate,
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
match_index,
|
||||
),
|
||||
DFA::ByteClass(ref dfa) => dfa.overlapping_find_at(
|
||||
prestate,
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
match_index,
|
||||
),
|
||||
DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at(
|
||||
prestate,
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
match_index,
|
||||
),
|
||||
DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at(
|
||||
prestate,
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
match_index,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn earliest_find_at(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
state_id: &mut S,
|
||||
) -> Option<Match> {
|
||||
match *self {
|
||||
DFA::Standard(ref dfa) => {
|
||||
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
||||
}
|
||||
DFA::ByteClass(ref dfa) => {
|
||||
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
||||
}
|
||||
DFA::Premultiplied(ref dfa) => {
|
||||
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
||||
}
|
||||
DFA::PremultipliedByteClass(ref dfa) => {
|
||||
dfa.earliest_find_at(prestate, haystack, at, state_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_at_no_state(
|
||||
&self,
|
||||
prestate: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Option<Match> {
|
||||
match *self {
|
||||
DFA::Standard(ref dfa) => {
|
||||
dfa.find_at_no_state(prestate, haystack, at)
|
||||
}
|
||||
DFA::ByteClass(ref dfa) => {
|
||||
dfa.find_at_no_state(prestate, haystack, at)
|
||||
}
|
||||
DFA::Premultiplied(ref dfa) => {
|
||||
dfa.find_at_no_state(prestate, haystack, at)
|
||||
}
|
||||
DFA::PremultipliedByteClass(ref dfa) => {
|
||||
dfa.find_at_no_state(prestate, haystack, at)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Standard<S>(Repr<S>);
|
||||
|
||||
impl<S: StateID> Standard<S> {
|
||||
fn repr(&self) -> &Repr<S> {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: StateID> Automaton for Standard<S> {
|
||||
type ID = S;
|
||||
|
||||
fn match_kind(&self) -> &MatchKind {
|
||||
&self.repr().match_kind
|
||||
}
|
||||
|
||||
fn anchored(&self) -> bool {
|
||||
self.repr().anchored
|
||||
}
|
||||
|
||||
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
||||
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
||||
}
|
||||
|
||||
fn start_state(&self) -> S {
|
||||
self.repr().start_id
|
||||
}
|
||||
|
||||
fn is_valid(&self, id: S) -> bool {
|
||||
id.to_usize() < self.repr().state_count
|
||||
}
|
||||
|
||||
fn is_match_state(&self, id: S) -> bool {
|
||||
self.repr().is_match_state(id)
|
||||
}
|
||||
|
||||
fn is_match_or_dead_state(&self, id: S) -> bool {
|
||||
self.repr().is_match_or_dead_state(id)
|
||||
}
|
||||
|
||||
fn get_match(
|
||||
&self,
|
||||
id: S,
|
||||
match_index: usize,
|
||||
end: usize,
|
||||
) -> Option<Match> {
|
||||
self.repr().get_match(id, match_index, end)
|
||||
}
|
||||
|
||||
fn match_count(&self, id: S) -> usize {
|
||||
self.repr().match_count(id)
|
||||
}
|
||||
|
||||
unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
|
||||
let o = current.to_usize() * 256 + input as usize;
|
||||
*self.repr().trans.get_unchecked(o)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ByteClass<S>(Repr<S>);
|
||||
|
||||
impl<S: StateID> ByteClass<S> {
|
||||
fn repr(&self) -> &Repr<S> {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: StateID> Automaton for ByteClass<S> {
|
||||
type ID = S;
|
||||
|
||||
fn match_kind(&self) -> &MatchKind {
|
||||
&self.repr().match_kind
|
||||
}
|
||||
|
||||
fn anchored(&self) -> bool {
|
||||
self.repr().anchored
|
||||
}
|
||||
|
||||
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
||||
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
||||
}
|
||||
|
||||
fn start_state(&self) -> S {
|
||||
self.repr().start_id
|
||||
}
|
||||
|
||||
fn is_valid(&self, id: S) -> bool {
|
||||
id.to_usize() < self.repr().state_count
|
||||
}
|
||||
|
||||
fn is_match_state(&self, id: S) -> bool {
|
||||
self.repr().is_match_state(id)
|
||||
}
|
||||
|
||||
fn is_match_or_dead_state(&self, id: S) -> bool {
|
||||
self.repr().is_match_or_dead_state(id)
|
||||
}
|
||||
|
||||
fn get_match(
|
||||
&self,
|
||||
id: S,
|
||||
match_index: usize,
|
||||
end: usize,
|
||||
) -> Option<Match> {
|
||||
self.repr().get_match(id, match_index, end)
|
||||
}
|
||||
|
||||
fn match_count(&self, id: S) -> usize {
|
||||
self.repr().match_count(id)
|
||||
}
|
||||
|
||||
unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
|
||||
let alphabet_len = self.repr().byte_classes.alphabet_len();
|
||||
let input = self.repr().byte_classes.get(input);
|
||||
let o = current.to_usize() * alphabet_len + input as usize;
|
||||
*self.repr().trans.get_unchecked(o)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Premultiplied<S>(Repr<S>);
|
||||
|
||||
impl<S: StateID> Premultiplied<S> {
|
||||
fn repr(&self) -> &Repr<S> {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: StateID> Automaton for Premultiplied<S> {
|
||||
type ID = S;
|
||||
|
||||
fn match_kind(&self) -> &MatchKind {
|
||||
&self.repr().match_kind
|
||||
}
|
||||
|
||||
fn anchored(&self) -> bool {
|
||||
self.repr().anchored
|
||||
}
|
||||
|
||||
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
||||
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
||||
}
|
||||
|
||||
fn start_state(&self) -> S {
|
||||
self.repr().start_id
|
||||
}
|
||||
|
||||
fn is_valid(&self, id: S) -> bool {
|
||||
(id.to_usize() / 256) < self.repr().state_count
|
||||
}
|
||||
|
||||
fn is_match_state(&self, id: S) -> bool {
|
||||
self.repr().is_match_state(id)
|
||||
}
|
||||
|
||||
fn is_match_or_dead_state(&self, id: S) -> bool {
|
||||
self.repr().is_match_or_dead_state(id)
|
||||
}
|
||||
|
||||
fn get_match(
|
||||
&self,
|
||||
id: S,
|
||||
match_index: usize,
|
||||
end: usize,
|
||||
) -> Option<Match> {
|
||||
if id > self.repr().max_match {
|
||||
return None;
|
||||
}
|
||||
self.repr()
|
||||
.matches
|
||||
.get(id.to_usize() / 256)
|
||||
.and_then(|m| m.get(match_index))
|
||||
.map(|&(id, len)| Match { pattern: id, len, end })
|
||||
}
|
||||
|
||||
fn match_count(&self, id: S) -> usize {
|
||||
let o = id.to_usize() / 256;
|
||||
self.repr().matches[o].len()
|
||||
}
|
||||
|
||||
unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
|
||||
let o = current.to_usize() + input as usize;
|
||||
*self.repr().trans.get_unchecked(o)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PremultipliedByteClass<S>(Repr<S>);
|
||||
|
||||
impl<S: StateID> PremultipliedByteClass<S> {
|
||||
fn repr(&self) -> &Repr<S> {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: StateID> Automaton for PremultipliedByteClass<S> {
|
||||
type ID = S;
|
||||
|
||||
fn match_kind(&self) -> &MatchKind {
|
||||
&self.repr().match_kind
|
||||
}
|
||||
|
||||
fn anchored(&self) -> bool {
|
||||
self.repr().anchored
|
||||
}
|
||||
|
||||
fn prefilter(&self) -> Option<&dyn Prefilter> {
|
||||
self.repr().prefilter.as_ref().map(|p| p.as_ref())
|
||||
}
|
||||
|
||||
fn start_state(&self) -> S {
|
||||
self.repr().start_id
|
||||
}
|
||||
|
||||
fn is_valid(&self, id: S) -> bool {
|
||||
(id.to_usize() / self.repr().alphabet_len()) < self.repr().state_count
|
||||
}
|
||||
|
||||
fn is_match_state(&self, id: S) -> bool {
|
||||
self.repr().is_match_state(id)
|
||||
}
|
||||
|
||||
fn is_match_or_dead_state(&self, id: S) -> bool {
|
||||
self.repr().is_match_or_dead_state(id)
|
||||
}
|
||||
|
||||
fn get_match(
|
||||
&self,
|
||||
id: S,
|
||||
match_index: usize,
|
||||
end: usize,
|
||||
) -> Option<Match> {
|
||||
if id > self.repr().max_match {
|
||||
return None;
|
||||
}
|
||||
self.repr()
|
||||
.matches
|
||||
.get(id.to_usize() / self.repr().alphabet_len())
|
||||
.and_then(|m| m.get(match_index))
|
||||
.map(|&(id, len)| Match { pattern: id, len, end })
|
||||
}
|
||||
|
||||
fn match_count(&self, id: S) -> usize {
|
||||
let o = id.to_usize() / self.repr().alphabet_len();
|
||||
self.repr().matches[o].len()
|
||||
}
|
||||
|
||||
unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
|
||||
let input = self.repr().byte_classes.get(input);
|
||||
let o = current.to_usize() + input as usize;
|
||||
*self.repr().trans.get_unchecked(o)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Repr<S> {
|
||||
match_kind: MatchKind,
|
||||
anchored: bool,
|
||||
premultiplied: bool,
|
||||
start_id: S,
|
||||
/// The length, in bytes, of the longest pattern in this automaton. This
|
||||
/// information is useful for keeping correct buffer sizes when searching
|
||||
/// on streams.
|
||||
max_pattern_len: usize,
|
||||
/// The total number of patterns added to this automaton. This includes
|
||||
/// patterns that may never match.
|
||||
pattern_count: usize,
|
||||
state_count: usize,
|
||||
max_match: S,
|
||||
/// The number of bytes of heap used by this NFA's transition table.
|
||||
heap_bytes: usize,
|
||||
/// A prefilter for quickly detecting candidate matchs, if pertinent.
|
||||
prefilter: Option<PrefilterObj>,
|
||||
byte_classes: ByteClasses,
|
||||
trans: Vec<S>,
|
||||
matches: Vec<Vec<(PatternID, PatternLength)>>,
|
||||
}
|
||||
|
||||
impl<S: StateID> Repr<S> {
|
||||
/// Returns the total alphabet size for this DFA.
|
||||
///
|
||||
/// If byte classes are enabled, then this corresponds to the number of
|
||||
/// equivalence classes. If they are disabled, then this is always 256.
|
||||
fn alphabet_len(&self) -> usize {
|
||||
self.byte_classes.alphabet_len()
|
||||
}
|
||||
|
||||
/// Returns true only if the given state is a match state.
|
||||
fn is_match_state(&self, id: S) -> bool {
|
||||
id <= self.max_match && id > dead_id()
|
||||
}
|
||||
|
||||
/// Returns true only if the given state is either a dead state or a match
|
||||
/// state.
|
||||
fn is_match_or_dead_state(&self, id: S) -> bool {
|
||||
id <= self.max_match
|
||||
}
|
||||
|
||||
/// Get the ith match for the given state, where the end position of a
|
||||
/// match was found at `end`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// The caller must ensure that the given state identifier is valid,
|
||||
/// otherwise this may panic. The `match_index` need not be valid. That is,
|
||||
/// if the given state has no matches then this returns `None`.
|
||||
fn get_match(
|
||||
&self,
|
||||
id: S,
|
||||
match_index: usize,
|
||||
end: usize,
|
||||
) -> Option<Match> {
|
||||
if id > self.max_match {
|
||||
return None;
|
||||
}
|
||||
self.matches
|
||||
.get(id.to_usize())
|
||||
.and_then(|m| m.get(match_index))
|
||||
.map(|&(id, len)| Match { pattern: id, len, end })
|
||||
}
|
||||
|
||||
/// Return the total number of matches for the given state.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// The caller must ensure that the given identifier is valid, or else
|
||||
/// this panics.
|
||||
fn match_count(&self, id: S) -> usize {
|
||||
self.matches[id.to_usize()].len()
|
||||
}
|
||||
|
||||
/// Get the next state given `from` as the current state and `byte` as the
|
||||
/// current input byte.
|
||||
fn next_state(&self, from: S, byte: u8) -> S {
|
||||
let alphabet_len = self.alphabet_len();
|
||||
let byte = self.byte_classes.get(byte);
|
||||
self.trans[from.to_usize() * alphabet_len + byte as usize]
|
||||
}
|
||||
|
||||
/// Set the `byte` transition for the `from` state to point to `to`.
|
||||
fn set_next_state(&mut self, from: S, byte: u8, to: S) {
|
||||
let alphabet_len = self.alphabet_len();
|
||||
let byte = self.byte_classes.get(byte);
|
||||
self.trans[from.to_usize() * alphabet_len + byte as usize] = to;
|
||||
}
|
||||
|
||||
/// Swap the given states in place.
|
||||
fn swap_states(&mut self, id1: S, id2: S) {
|
||||
assert!(!self.premultiplied, "can't swap states in premultiplied DFA");
|
||||
|
||||
let o1 = id1.to_usize() * self.alphabet_len();
|
||||
let o2 = id2.to_usize() * self.alphabet_len();
|
||||
for b in 0..self.alphabet_len() {
|
||||
self.trans.swap(o1 + b, o2 + b);
|
||||
}
|
||||
self.matches.swap(id1.to_usize(), id2.to_usize());
|
||||
}
|
||||
|
||||
/// This routine shuffles all match states in this DFA to the beginning
|
||||
/// of the DFA such that every non-match state appears after every match
|
||||
/// state. (With one exception: the special fail and dead states remain as
|
||||
/// the first two states.)
|
||||
///
|
||||
/// The purpose of doing this shuffling is to avoid an extra conditional
|
||||
/// in the search loop, and in particular, detecting whether a state is a
|
||||
/// match or not does not need to access any memory.
|
||||
///
|
||||
/// This updates `self.max_match` to point to the last matching state as
|
||||
/// well as `self.start` if the starting state was moved.
|
||||
fn shuffle_match_states(&mut self) {
|
||||
assert!(
|
||||
!self.premultiplied,
|
||||
"cannot shuffle match states of premultiplied DFA"
|
||||
);
|
||||
|
||||
if self.state_count <= 1 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut first_non_match = self.start_id.to_usize();
|
||||
while first_non_match < self.state_count
|
||||
&& self.matches[first_non_match].len() > 0
|
||||
{
|
||||
first_non_match += 1;
|
||||
}
|
||||
|
||||
let mut swaps: Vec<S> = vec![fail_id(); self.state_count];
|
||||
let mut cur = self.state_count - 1;
|
||||
while cur > first_non_match {
|
||||
if self.matches[cur].len() > 0 {
|
||||
self.swap_states(
|
||||
S::from_usize(cur),
|
||||
S::from_usize(first_non_match),
|
||||
);
|
||||
swaps[cur] = S::from_usize(first_non_match);
|
||||
swaps[first_non_match] = S::from_usize(cur);
|
||||
|
||||
first_non_match += 1;
|
||||
while first_non_match < cur
|
||||
&& self.matches[first_non_match].len() > 0
|
||||
{
|
||||
first_non_match += 1;
|
||||
}
|
||||
}
|
||||
cur -= 1;
|
||||
}
|
||||
for id in (0..self.state_count).map(S::from_usize) {
|
||||
let alphabet_len = self.alphabet_len();
|
||||
let offset = id.to_usize() * alphabet_len;
|
||||
for next in &mut self.trans[offset..offset + alphabet_len] {
|
||||
if swaps[next.to_usize()] != fail_id() {
|
||||
*next = swaps[next.to_usize()];
|
||||
}
|
||||
}
|
||||
}
|
||||
if swaps[self.start_id.to_usize()] != fail_id() {
|
||||
self.start_id = swaps[self.start_id.to_usize()];
|
||||
}
|
||||
self.max_match = S::from_usize(first_non_match - 1);
|
||||
}
|
||||
|
||||
fn premultiply(&mut self) -> Result<()> {
|
||||
if self.premultiplied || self.state_count <= 1 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let alpha_len = self.alphabet_len();
|
||||
premultiply_overflow_error(
|
||||
S::from_usize(self.state_count - 1),
|
||||
alpha_len,
|
||||
)?;
|
||||
|
||||
for id in (2..self.state_count).map(S::from_usize) {
|
||||
let offset = id.to_usize() * alpha_len;
|
||||
for next in &mut self.trans[offset..offset + alpha_len] {
|
||||
if *next == dead_id() {
|
||||
continue;
|
||||
}
|
||||
*next = S::from_usize(next.to_usize() * alpha_len);
|
||||
}
|
||||
}
|
||||
self.premultiplied = true;
|
||||
self.start_id = S::from_usize(self.start_id.to_usize() * alpha_len);
|
||||
self.max_match = S::from_usize(self.max_match.to_usize() * alpha_len);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Computes the total amount of heap used by this NFA in bytes.
|
||||
fn calculate_size(&mut self) {
|
||||
let mut size = (self.trans.len() * size_of::<S>())
|
||||
+ (self.matches.len()
|
||||
* size_of::<Vec<(PatternID, PatternLength)>>());
|
||||
for state_matches in &self.matches {
|
||||
size +=
|
||||
state_matches.len() * size_of::<(PatternID, PatternLength)>();
|
||||
}
|
||||
size += self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes());
|
||||
self.heap_bytes = size;
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for configuring the determinization of an NFA into a DFA.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Builder {
|
||||
premultiply: bool,
|
||||
byte_classes: bool,
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
/// Create a new builder for a DFA.
|
||||
pub fn new() -> Builder {
|
||||
Builder { premultiply: true, byte_classes: true }
|
||||
}
|
||||
|
||||
/// Build a DFA from the given NFA.
|
||||
///
|
||||
/// This returns an error if the state identifiers exceed their
|
||||
/// representation size. This can only happen when state ids are
|
||||
/// premultiplied (which is enabled by default).
|
||||
pub fn build<S: StateID>(&self, nfa: &NFA<S>) -> Result<DFA<S>> {
|
||||
let byte_classes = if self.byte_classes {
|
||||
nfa.byte_classes().clone()
|
||||
} else {
|
||||
ByteClasses::singletons()
|
||||
};
|
||||
let alphabet_len = byte_classes.alphabet_len();
|
||||
let trans = vec![fail_id(); alphabet_len * nfa.state_len()];
|
||||
let matches = vec![vec![]; nfa.state_len()];
|
||||
let mut repr = Repr {
|
||||
match_kind: nfa.match_kind().clone(),
|
||||
anchored: nfa.anchored(),
|
||||
premultiplied: false,
|
||||
start_id: nfa.start_state(),
|
||||
max_pattern_len: nfa.max_pattern_len(),
|
||||
pattern_count: nfa.pattern_count(),
|
||||
state_count: nfa.state_len(),
|
||||
max_match: fail_id(),
|
||||
heap_bytes: 0,
|
||||
prefilter: nfa.prefilter_obj().map(|p| p.clone()),
|
||||
byte_classes: byte_classes.clone(),
|
||||
trans: trans,
|
||||
matches: matches,
|
||||
};
|
||||
for id in (0..nfa.state_len()).map(S::from_usize) {
|
||||
repr.matches[id.to_usize()].extend_from_slice(nfa.matches(id));
|
||||
|
||||
let fail = nfa.failure_transition(id);
|
||||
nfa.iter_all_transitions(&byte_classes, id, |b, mut next| {
|
||||
if next == fail_id() {
|
||||
next = nfa_next_state_memoized(nfa, &repr, id, fail, b);
|
||||
}
|
||||
repr.set_next_state(id, b, next);
|
||||
});
|
||||
}
|
||||
repr.shuffle_match_states();
|
||||
repr.calculate_size();
|
||||
if self.premultiply {
|
||||
repr.premultiply()?;
|
||||
if byte_classes.is_singleton() {
|
||||
Ok(DFA::Premultiplied(Premultiplied(repr)))
|
||||
} else {
|
||||
Ok(DFA::PremultipliedByteClass(PremultipliedByteClass(repr)))
|
||||
}
|
||||
} else {
|
||||
if byte_classes.is_singleton() {
|
||||
Ok(DFA::Standard(Standard(repr)))
|
||||
} else {
|
||||
Ok(DFA::ByteClass(ByteClass(repr)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether to use byte classes or in the DFA.
|
||||
pub fn byte_classes(&mut self, yes: bool) -> &mut Builder {
|
||||
self.byte_classes = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to premultiply state identifier in the DFA.
|
||||
pub fn premultiply(&mut self, yes: bool) -> &mut Builder {
|
||||
self.premultiply = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// This returns the next NFA transition (including resolving failure
|
||||
/// transitions), except once it sees a state id less than the id of the DFA
|
||||
/// state that is currently being populated, then we no longer need to follow
|
||||
/// failure transitions and can instead query the pre-computed state id from
|
||||
/// the DFA itself.
|
||||
///
|
||||
/// In general, this should only be called when a failure transition is seen.
|
||||
fn nfa_next_state_memoized<S: StateID>(
|
||||
nfa: &NFA<S>,
|
||||
dfa: &Repr<S>,
|
||||
populating: S,
|
||||
mut current: S,
|
||||
input: u8,
|
||||
) -> S {
|
||||
loop {
|
||||
if current < populating {
|
||||
return dfa.next_state(current, input);
|
||||
}
|
||||
let next = nfa.next_state(current, input);
|
||||
if next != fail_id() {
|
||||
return next;
|
||||
}
|
||||
current = nfa.failure_transition(current);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
use std::error;
|
||||
use std::fmt;
|
||||
use std::result;
|
||||
|
||||
pub type Result<T> = result::Result<T, Error>;
|
||||
|
||||
/// An error that occurred during the construction of an Aho-Corasick
|
||||
/// automaton.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Error {
|
||||
kind: ErrorKind,
|
||||
}
|
||||
|
||||
/// The kind of error that occurred.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ErrorKind {
|
||||
/// An error that occurs when constructing an automaton would require the
|
||||
/// use of a state ID that overflows the chosen state ID representation.
|
||||
/// For example, if one is using `u8` for state IDs and builds a DFA with
|
||||
/// 257 states, then the last state's ID will be `256` which cannot be
|
||||
/// represented with `u8`.
|
||||
StateIDOverflow {
|
||||
/// The maximum possible state ID.
|
||||
max: usize,
|
||||
},
|
||||
/// An error that occurs when premultiplication of state IDs is requested
|
||||
/// when constructing an Aho-Corasick DFA, but doing so would overflow the
|
||||
/// chosen state ID representation.
|
||||
///
|
||||
/// When `max == requested_max`, then the state ID would overflow `usize`.
|
||||
PremultiplyOverflow {
|
||||
/// The maximum possible state id.
|
||||
max: usize,
|
||||
/// The maximum ID required by premultiplication.
|
||||
requested_max: usize,
|
||||
},
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// Return the kind of this error.
|
||||
pub fn kind(&self) -> &ErrorKind {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
pub(crate) fn state_id_overflow(max: usize) -> Error {
|
||||
Error { kind: ErrorKind::StateIDOverflow { max } }
|
||||
}
|
||||
|
||||
pub(crate) fn premultiply_overflow(
|
||||
max: usize,
|
||||
requested_max: usize,
|
||||
) -> Error {
|
||||
Error { kind: ErrorKind::PremultiplyOverflow { max, requested_max } }
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match self.kind {
|
||||
ErrorKind::StateIDOverflow { .. } => {
|
||||
"state id representation too small"
|
||||
}
|
||||
ErrorKind::PremultiplyOverflow { .. } => {
|
||||
"state id representation too small for premultiplication"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.kind {
|
||||
ErrorKind::StateIDOverflow { max } => write!(
|
||||
f,
|
||||
"building the automaton failed because it required \
|
||||
building more states that can be identified, where the \
|
||||
maximum ID for the chosen representation is {}",
|
||||
max,
|
||||
),
|
||||
ErrorKind::PremultiplyOverflow { max, requested_max } => {
|
||||
if max == requested_max {
|
||||
write!(
|
||||
f,
|
||||
"premultiplication of states requires the ability to \
|
||||
represent a state ID greater than what can fit on \
|
||||
this platform's usize, which is {}",
|
||||
::std::usize::MAX,
|
||||
)
|
||||
} else {
|
||||
write!(
|
||||
f,
|
||||
"premultiplication of states requires the ability to \
|
||||
represent at least a state ID of {}, but the chosen \
|
||||
representation only permits a maximum state ID of {}",
|
||||
requested_max, max,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,297 @@
|
|||
/*!
|
||||
A library for finding occurrences of many patterns at once. This library
|
||||
provides multiple pattern search principally through an implementation of the
|
||||
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
|
||||
which builds a fast finite state machine for executing searches in linear time.
|
||||
|
||||
Additionally, this library provides a number of configuration options for
|
||||
building the automaton that permit controlling the space versus time trade
|
||||
off. Other features include simple ASCII case insensitive matching, finding
|
||||
overlapping matches, replacements, searching streams and even searching and
|
||||
replacing text in streams.
|
||||
|
||||
Finally, unlike all other (known) Aho-Corasick implementations, this one
|
||||
supports enabling
|
||||
[leftmost-first](enum.MatchKind.html#variant.LeftmostFirst)
|
||||
or
|
||||
[leftmost-longest](enum.MatchKind.html#variant.LeftmostFirst)
|
||||
match semantics, using a (seemingly) novel alternative construction algorithm.
|
||||
For more details on what match semantics means, see the
|
||||
[`MatchKind`](enum.MatchKind.html)
|
||||
type.
|
||||
|
||||
# Overview
|
||||
|
||||
This section gives a brief overview of the primary types in this crate:
|
||||
|
||||
* [`AhoCorasick`](struct.AhoCorasick.html) is the primary type and represents
|
||||
an Aho-Corasick automaton. This is the type you use to execute searches.
|
||||
* [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) can be used to build
|
||||
an Aho-Corasick automaton, and supports configuring a number of options.
|
||||
* [`Match`](struct.Match.html) represents a single match reported by an
|
||||
Aho-Corasick automaton. Each match has two pieces of information: the pattern
|
||||
that matched and the start and end byte offsets corresponding to the position
|
||||
in the haystack at which it matched.
|
||||
|
||||
Additionally, the [`packed`](packed/index.html) sub-module contains a lower
|
||||
level API for using fast vectorized routines for finding a small number of
|
||||
patterns in a haystack.
|
||||
|
||||
# Example: basic searching
|
||||
|
||||
This example shows how to search for occurrences of multiple patterns
|
||||
simultaneously. Each match includes the pattern that matched along with the
|
||||
byte offsets of the match.
|
||||
|
||||
```
|
||||
use aho_corasick::AhoCorasick;
|
||||
|
||||
let patterns = &["apple", "maple", "Snapple"];
|
||||
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||
|
||||
let ac = AhoCorasick::new(patterns);
|
||||
let mut matches = vec![];
|
||||
for mat in ac.find_iter(haystack) {
|
||||
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||
}
|
||||
assert_eq!(matches, vec![
|
||||
(1, 13, 18),
|
||||
(0, 28, 33),
|
||||
(2, 43, 50),
|
||||
]);
|
||||
```
|
||||
|
||||
# Example: case insensitivity
|
||||
|
||||
This is like the previous example, but matches `Snapple` case insensitively
|
||||
using `AhoCorasickBuilder`:
|
||||
|
||||
```
|
||||
use aho_corasick::AhoCorasickBuilder;
|
||||
|
||||
let patterns = &["apple", "maple", "snapple"];
|
||||
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||
|
||||
let ac = AhoCorasickBuilder::new()
|
||||
.ascii_case_insensitive(true)
|
||||
.build(patterns);
|
||||
let mut matches = vec![];
|
||||
for mat in ac.find_iter(haystack) {
|
||||
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||
}
|
||||
assert_eq!(matches, vec![
|
||||
(1, 13, 18),
|
||||
(0, 28, 33),
|
||||
(2, 43, 50),
|
||||
]);
|
||||
```
|
||||
|
||||
# Example: replacing matches in a stream
|
||||
|
||||
This example shows how to execute a search and replace on a stream without
|
||||
loading the entire stream into memory first.
|
||||
|
||||
```
|
||||
use aho_corasick::AhoCorasick;
|
||||
|
||||
# fn example() -> Result<(), ::std::io::Error> {
|
||||
let patterns = &["fox", "brown", "quick"];
|
||||
let replace_with = &["sloth", "grey", "slow"];
|
||||
|
||||
// In a real example, these might be `std::fs::File`s instead. All you need to
|
||||
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
|
||||
let rdr = "The quick brown fox.";
|
||||
let mut wtr = vec![];
|
||||
|
||||
let ac = AhoCorasick::new(patterns);
|
||||
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
|
||||
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
|
||||
# Ok(()) }; example().unwrap()
|
||||
```
|
||||
|
||||
# Example: finding the leftmost first match
|
||||
|
||||
In the textbook description of Aho-Corasick, its formulation is typically
|
||||
structured such that it reports all possible matches, even when they overlap
|
||||
with another. In many cases, overlapping matches may not be desired, such as
|
||||
the case of finding all successive non-overlapping matches like you might with
|
||||
a standard regular expression.
|
||||
|
||||
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
|
||||
this doesn't always work in the expected way, since it will report matches as
|
||||
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
|
||||
against the text `Samwise`. Most regex engines (that are Perl-like, or
|
||||
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
|
||||
algorithm modified for reporting non-overlapping matches will report `Sam`.
|
||||
|
||||
A novel contribution of this library is the ability to change the match
|
||||
semantics of Aho-Corasick (without additional search time overhead) such that
|
||||
`Samwise` is reported instead. For example, here's the standard approach:
|
||||
|
||||
```
|
||||
use aho_corasick::AhoCorasick;
|
||||
|
||||
let patterns = &["Samwise", "Sam"];
|
||||
let haystack = "Samwise";
|
||||
|
||||
let ac = AhoCorasick::new(patterns);
|
||||
let mat = ac.find(haystack).expect("should have a match");
|
||||
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
|
||||
```
|
||||
|
||||
And now here's the leftmost-first version, which matches how a Perl-like
|
||||
regex will work:
|
||||
|
||||
```
|
||||
use aho_corasick::{AhoCorasickBuilder, MatchKind};
|
||||
|
||||
let patterns = &["Samwise", "Sam"];
|
||||
let haystack = "Samwise";
|
||||
|
||||
let ac = AhoCorasickBuilder::new()
|
||||
.match_kind(MatchKind::LeftmostFirst)
|
||||
.build(patterns);
|
||||
let mat = ac.find(haystack).expect("should have a match");
|
||||
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
|
||||
```
|
||||
|
||||
In addition to leftmost-first semantics, this library also supports
|
||||
leftmost-longest semantics, which match the POSIX behavior of a regular
|
||||
expression alternation. See
|
||||
[`MatchKind`](enum.MatchKind.html)
|
||||
for more details.
|
||||
|
||||
# Prefilters
|
||||
|
||||
While an Aho-Corasick automaton can perform admirably when compared to more
|
||||
naive solutions, it is generally slower than more specialized algorithms that
|
||||
are accelerated using vector instructions such as SIMD.
|
||||
|
||||
For that reason, this library will internally use a "prefilter" to attempt
|
||||
to accelerate searches when possible. Currently, this library has fairly
|
||||
limited implementation that only applies when there are 3 or fewer unique
|
||||
starting bytes among all patterns in an automaton.
|
||||
|
||||
While a prefilter is generally good to have on by default since it works well
|
||||
in the common case, it can lead to less predictable or even sub-optimal
|
||||
performance in some cases. For that reason, prefilters can be disabled via
|
||||
[`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter).
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
// We can never be truly no_std, but we could be alloc-only some day, so
|
||||
// require the std feature for now.
|
||||
#[cfg(not(feature = "std"))]
|
||||
compile_error!("`std` feature is currently required to build this crate");
|
||||
|
||||
extern crate memchr;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate doc_comment;
|
||||
|
||||
#[cfg(test)]
|
||||
doctest!("../README.md");
|
||||
|
||||
pub use ahocorasick::{
|
||||
AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind,
|
||||
StreamFindIter,
|
||||
};
|
||||
pub use error::{Error, ErrorKind};
|
||||
pub use state_id::StateID;
|
||||
|
||||
mod ahocorasick;
|
||||
mod automaton;
|
||||
mod buffer;
|
||||
mod byte_frequencies;
|
||||
mod classes;
|
||||
mod dfa;
|
||||
mod error;
|
||||
mod nfa;
|
||||
pub mod packed;
|
||||
mod prefilter;
|
||||
mod state_id;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// A representation of a match reported by an Aho-Corasick automaton.
|
||||
///
|
||||
/// A match has two essential pieces of information: the identifier of the
|
||||
/// pattern that matched, along with the start and end offsets of the match
|
||||
/// in the haystack.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Basic usage:
|
||||
///
|
||||
/// ```
|
||||
/// use aho_corasick::AhoCorasick;
|
||||
///
|
||||
/// let ac = AhoCorasick::new(&[
|
||||
/// "foo", "bar", "baz",
|
||||
/// ]);
|
||||
/// let mat = ac.find("xxx bar xxx").expect("should have a match");
|
||||
/// assert_eq!(1, mat.pattern());
|
||||
/// assert_eq!(4, mat.start());
|
||||
/// assert_eq!(7, mat.end());
|
||||
/// ```
|
||||
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
|
||||
pub struct Match {
|
||||
/// The pattern id.
|
||||
pattern: usize,
|
||||
/// The length of this match, such that the starting position of the match
|
||||
/// is `end - len`.
|
||||
///
|
||||
/// We use length here because, other than the pattern id, the only
|
||||
/// information about each pattern that the automaton stores is its length.
|
||||
/// So using the length here is just a bit more natural. But it isn't
|
||||
/// technically required.
|
||||
len: usize,
|
||||
/// The end offset of the match, exclusive.
|
||||
end: usize,
|
||||
}
|
||||
|
||||
impl Match {
|
||||
/// Returns the identifier of the pattern that matched.
|
||||
///
|
||||
/// The identifier of a pattern is derived from the position in which it
|
||||
/// was originally inserted into the corresponding automaton. The first
|
||||
/// pattern has identifier `0`, and each subsequent pattern is `1`, `2`
|
||||
/// and so on.
|
||||
#[inline]
|
||||
pub fn pattern(&self) -> usize {
|
||||
self.pattern
|
||||
}
|
||||
|
||||
/// The starting position of the match.
|
||||
#[inline]
|
||||
pub fn start(&self) -> usize {
|
||||
self.end - self.len
|
||||
}
|
||||
|
||||
/// The ending position of the match.
|
||||
#[inline]
|
||||
pub fn end(&self) -> usize {
|
||||
self.end
|
||||
}
|
||||
|
||||
/// Returns true if and only if this match is empty. That is, when
|
||||
/// `start() == end()`.
|
||||
///
|
||||
/// An empty match can only be returned when the empty string was among
|
||||
/// the patterns used to build the Aho-Corasick automaton.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len == 0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn increment(&self, by: usize) -> Match {
|
||||
Match { pattern: self.pattern, len: self.len, end: self.end + by }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn from_span(id: usize, start: usize, end: usize) -> Match {
|
||||
Match { pattern: id, len: end - start, end: end }
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,632 @@
|
|||
use std::u16;
|
||||
|
||||
use packed::pattern::Patterns;
|
||||
use packed::rabinkarp::RabinKarp;
|
||||
use packed::teddy::{self, Teddy};
|
||||
use Match;
|
||||
|
||||
/// This is a limit placed on the total number of patterns we're willing to try
|
||||
/// and match at once. As more sophisticated algorithms are added, this number
|
||||
/// may be increased.
|
||||
const PATTERN_LIMIT: usize = 128;
|
||||
|
||||
/// A knob for controlling the match semantics of a packed multiple string
|
||||
/// searcher.
|
||||
///
|
||||
/// This differs from the
|
||||
/// [`MatchKind`](../enum.MatchKind.html)
|
||||
/// type in the top-level crate module in that it doesn't support
|
||||
/// "standard" match semantics, and instead only supports leftmost-first or
|
||||
/// leftmost-longest. Namely, "standard" semantics cannot be easily supported
|
||||
/// by packed searchers.
|
||||
///
|
||||
/// For more information on the distinction between leftmost-first and
|
||||
/// leftmost-longest, see the docs on the top-level `MatchKind` type.
|
||||
///
|
||||
/// Unlike the top-level `MatchKind` type, the default match semantics for this
|
||||
/// type are leftmost-first.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum MatchKind {
|
||||
/// Use leftmost-first match semantics, which reports leftmost matches.
|
||||
/// When there are multiple possible leftmost matches, the match
|
||||
/// corresponding to the pattern that appeared earlier when constructing
|
||||
/// the automaton is reported.
|
||||
///
|
||||
/// This is the default.
|
||||
LeftmostFirst,
|
||||
/// Use leftmost-longest match semantics, which reports leftmost matches.
|
||||
/// When there are multiple possible leftmost matches, the longest match
|
||||
/// is chosen.
|
||||
LeftmostLongest,
|
||||
/// Hints that destructuring should not be exhaustive.
|
||||
///
|
||||
/// This enum may grow additional variants, so this makes sure clients
|
||||
/// don't count on exhaustive matching. (Otherwise, adding a new variant
|
||||
/// could break existing code.)
|
||||
#[doc(hidden)]
|
||||
__Nonexhaustive,
|
||||
}
|
||||
|
||||
impl Default for MatchKind {
|
||||
fn default() -> MatchKind {
|
||||
MatchKind::LeftmostFirst
|
||||
}
|
||||
}
|
||||
|
||||
/// The configuration for a packed multiple pattern searcher.
|
||||
///
|
||||
/// The configuration is currently limited only to being able to select the
|
||||
/// match semantics (leftmost-first or leftmost-longest) of a searcher. In the
|
||||
/// future, more knobs may be made available.
|
||||
///
|
||||
/// A configuration produces a [`packed::Builder`](struct.Builder.html), which
|
||||
/// in turn can be used to construct a
|
||||
/// [`packed::Searcher`](struct.Searcher.html) for searching.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to use leftmost-longest semantics instead of the
|
||||
/// default (leftmost-first).
|
||||
///
|
||||
/// ```
|
||||
/// use aho_corasick::packed::{Config, MatchKind};
|
||||
///
|
||||
/// # fn example() -> Option<()> {
|
||||
/// let searcher = Config::new()
|
||||
/// .match_kind(MatchKind::LeftmostLongest)
|
||||
/// .builder()
|
||||
/// .add("foo")
|
||||
/// .add("foobar")
|
||||
/// .build()?;
|
||||
/// let matches: Vec<usize> = searcher
|
||||
/// .find_iter("foobar")
|
||||
/// .map(|mat| mat.pattern())
|
||||
/// .collect();
|
||||
/// assert_eq!(vec![1], matches);
|
||||
/// # Some(()) }
|
||||
/// # if cfg!(target_arch = "x86_64") {
|
||||
/// # example().unwrap()
|
||||
/// # } else {
|
||||
/// # assert!(example().is_none());
|
||||
/// # }
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Config {
|
||||
kind: MatchKind,
|
||||
force: Option<ForceAlgorithm>,
|
||||
force_teddy_fat: Option<bool>,
|
||||
force_avx: Option<bool>,
|
||||
}
|
||||
|
||||
/// An internal option for forcing the use of a particular packed algorithm.
|
||||
///
|
||||
/// When an algorithm is forced, if a searcher could not be constructed for it,
|
||||
/// then no searcher will be returned even if an alternative algorithm would
|
||||
/// work.
|
||||
#[derive(Clone, Debug)]
|
||||
enum ForceAlgorithm {
|
||||
Teddy,
|
||||
RabinKarp,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Config {
|
||||
Config::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Create a new default configuration. A default configuration uses
|
||||
/// leftmost-first match semantics.
|
||||
pub fn new() -> Config {
|
||||
Config {
|
||||
kind: MatchKind::LeftmostFirst,
|
||||
force: None,
|
||||
force_teddy_fat: None,
|
||||
force_avx: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a packed builder from this configuration. The builder can be
|
||||
/// used to accumulate patterns and create a
|
||||
/// [`Searcher`](struct.Searcher.html)
|
||||
/// from them.
|
||||
pub fn builder(&self) -> Builder {
|
||||
Builder::from_config(self.clone())
|
||||
}
|
||||
|
||||
/// Set the match semantics for this configuration.
|
||||
pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config {
|
||||
self.kind = kind;
|
||||
self
|
||||
}
|
||||
|
||||
/// An undocumented method for forcing the use of the Teddy algorithm.
|
||||
///
|
||||
/// This is only exposed for more precise testing and benchmarks. Callers
|
||||
/// should not use it as it is not part of the API stability guarantees of
|
||||
/// this crate.
|
||||
#[doc(hidden)]
|
||||
pub fn force_teddy(&mut self, yes: bool) -> &mut Config {
|
||||
if yes {
|
||||
self.force = Some(ForceAlgorithm::Teddy);
|
||||
} else {
|
||||
self.force = None;
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// An undocumented method for forcing the use of the Fat Teddy algorithm.
|
||||
///
|
||||
/// This is only exposed for more precise testing and benchmarks. Callers
|
||||
/// should not use it as it is not part of the API stability guarantees of
|
||||
/// this crate.
|
||||
#[doc(hidden)]
|
||||
pub fn force_teddy_fat(&mut self, yes: Option<bool>) -> &mut Config {
|
||||
self.force_teddy_fat = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// An undocumented method for forcing the use of SSE (`Some(false)`) or
|
||||
/// AVX (`Some(true)`) algorithms.
|
||||
///
|
||||
/// This is only exposed for more precise testing and benchmarks. Callers
|
||||
/// should not use it as it is not part of the API stability guarantees of
|
||||
/// this crate.
|
||||
#[doc(hidden)]
|
||||
pub fn force_avx(&mut self, yes: Option<bool>) -> &mut Config {
|
||||
self.force_avx = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// An undocumented method for forcing the use of the Rabin-Karp algorithm.
|
||||
///
|
||||
/// This is only exposed for more precise testing and benchmarks. Callers
|
||||
/// should not use it as it is not part of the API stability guarantees of
|
||||
/// this crate.
|
||||
#[doc(hidden)]
|
||||
pub fn force_rabin_karp(&mut self, yes: bool) -> &mut Config {
|
||||
if yes {
|
||||
self.force = Some(ForceAlgorithm::RabinKarp);
|
||||
} else {
|
||||
self.force = None;
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for constructing a packed searcher from a collection of patterns.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to use a builder to construct a searcher. By
|
||||
/// default, leftmost-first match semantics are used.
|
||||
///
|
||||
/// ```
|
||||
/// use aho_corasick::packed::{Builder, MatchKind};
|
||||
///
|
||||
/// # fn example() -> Option<()> {
|
||||
/// let searcher = Builder::new()
|
||||
/// .add("foobar")
|
||||
/// .add("foo")
|
||||
/// .build()?;
|
||||
/// let matches: Vec<usize> = searcher
|
||||
/// .find_iter("foobar")
|
||||
/// .map(|mat| mat.pattern())
|
||||
/// .collect();
|
||||
/// assert_eq!(vec![0], matches);
|
||||
/// # Some(()) }
|
||||
/// # if cfg!(target_arch = "x86_64") {
|
||||
/// # example().unwrap()
|
||||
/// # } else {
|
||||
/// # assert!(example().is_none());
|
||||
/// # }
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Builder {
|
||||
/// The configuration of this builder and subsequent matcher.
|
||||
config: Config,
|
||||
/// Set to true if the builder detects that a matcher cannot be built.
|
||||
inert: bool,
|
||||
/// The patterns provided by the caller.
|
||||
patterns: Patterns,
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
/// Create a new builder for constructing a multi-pattern searcher. This
|
||||
/// constructor uses the default configuration.
|
||||
pub fn new() -> Builder {
|
||||
Builder::from_config(Config::new())
|
||||
}
|
||||
|
||||
fn from_config(config: Config) -> Builder {
|
||||
Builder { config, inert: false, patterns: Patterns::new() }
|
||||
}
|
||||
|
||||
/// Build a searcher from the patterns added to this builder so far.
|
||||
pub fn build(&self) -> Option<Searcher> {
|
||||
if self.inert || self.patterns.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let mut patterns = self.patterns.clone();
|
||||
patterns.set_match_kind(self.config.kind);
|
||||
let rabinkarp = RabinKarp::new(&patterns);
|
||||
// Effectively, we only want to return a searcher if we can use Teddy,
|
||||
// since Teddy is our only fast packed searcher at the moment.
|
||||
// Rabin-Karp is only used when searching haystacks smaller than what
|
||||
// Teddy can support. Thus, the only way to get a Rabin-Karp searcher
|
||||
// is to force it using undocumented APIs (for tests/benchmarks).
|
||||
let (search_kind, minimum_len) = match self.config.force {
|
||||
None | Some(ForceAlgorithm::Teddy) => {
|
||||
let teddy = match self.build_teddy(&patterns) {
|
||||
None => return None,
|
||||
Some(teddy) => teddy,
|
||||
};
|
||||
let minimum_len = teddy.minimum_len();
|
||||
(SearchKind::Teddy(teddy), minimum_len)
|
||||
}
|
||||
Some(ForceAlgorithm::RabinKarp) => (SearchKind::RabinKarp, 0),
|
||||
};
|
||||
Some(Searcher {
|
||||
config: self.config.clone(),
|
||||
patterns: patterns,
|
||||
rabinkarp: rabinkarp,
|
||||
search_kind,
|
||||
minimum_len,
|
||||
})
|
||||
}
|
||||
|
||||
fn build_teddy(&self, patterns: &Patterns) -> Option<Teddy> {
|
||||
teddy::Builder::new()
|
||||
.avx(self.config.force_avx)
|
||||
.fat(self.config.force_teddy_fat)
|
||||
.build(&patterns)
|
||||
}
|
||||
|
||||
/// Add the given pattern to this set to match.
|
||||
///
|
||||
/// The order in which patterns are added is significant. Namely, when
|
||||
/// using leftmost-first match semantics, then when multiple patterns can
|
||||
/// match at a particular location, the pattern that was added first is
|
||||
/// used as the match.
|
||||
///
|
||||
/// If the number of patterns added exceeds the amount supported by packed
|
||||
/// searchers, then the builder will stop accumulating patterns and render
|
||||
/// itself inert. At this point, constructing a searcher will always return
|
||||
/// `None`.
|
||||
pub fn add<P: AsRef<[u8]>>(&mut self, pattern: P) -> &mut Builder {
|
||||
if self.inert {
|
||||
return self;
|
||||
} else if self.patterns.len() >= PATTERN_LIMIT {
|
||||
self.inert = true;
|
||||
self.patterns.reset();
|
||||
return self;
|
||||
}
|
||||
// Just in case PATTERN_LIMIT increases beyond u16::MAX.
|
||||
assert!(self.patterns.len() <= u16::MAX as usize);
|
||||
|
||||
let pattern = pattern.as_ref();
|
||||
if pattern.is_empty() {
|
||||
self.inert = true;
|
||||
self.patterns.reset();
|
||||
return self;
|
||||
}
|
||||
self.patterns.add(pattern);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add the given iterator of patterns to this set to match.
|
||||
///
|
||||
/// The iterator must yield elements that can be converted into a `&[u8]`.
|
||||
///
|
||||
/// The order in which patterns are added is significant. Namely, when
|
||||
/// using leftmost-first match semantics, then when multiple patterns can
|
||||
/// match at a particular location, the pattern that was added first is
|
||||
/// used as the match.
|
||||
///
|
||||
/// If the number of patterns added exceeds the amount supported by packed
|
||||
/// searchers, then the builder will stop accumulating patterns and render
|
||||
/// itself inert. At this point, constructing a searcher will always return
|
||||
/// `None`.
|
||||
pub fn extend<I, P>(&mut self, patterns: I) -> &mut Builder
|
||||
where
|
||||
I: IntoIterator<Item = P>,
|
||||
P: AsRef<[u8]>,
|
||||
{
|
||||
for p in patterns {
|
||||
self.add(p);
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Builder {
|
||||
fn default() -> Builder {
|
||||
Builder::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// A packed searcher for quickly finding occurrences of multiple patterns.
|
||||
///
|
||||
/// If callers need more flexible construction, or if one wants to change the
|
||||
/// match semantics (either leftmost-first or leftmost-longest), then one can
|
||||
/// use the [`Config`](struct.Config.html) and/or
|
||||
/// [`Builder`](struct.Builder.html) types for more fine grained control.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to create a searcher from an iterator of patterns.
|
||||
/// By default, leftmost-first match semantics are used.
|
||||
///
|
||||
/// ```
|
||||
/// use aho_corasick::packed::{MatchKind, Searcher};
|
||||
///
|
||||
/// # fn example() -> Option<()> {
|
||||
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||
/// let matches: Vec<usize> = searcher
|
||||
/// .find_iter("foobar")
|
||||
/// .map(|mat| mat.pattern())
|
||||
/// .collect();
|
||||
/// assert_eq!(vec![0], matches);
|
||||
/// # Some(()) }
|
||||
/// # if cfg!(target_arch = "x86_64") {
|
||||
/// # example().unwrap()
|
||||
/// # } else {
|
||||
/// # assert!(example().is_none());
|
||||
/// # }
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Searcher {
|
||||
config: Config,
|
||||
patterns: Patterns,
|
||||
rabinkarp: RabinKarp,
|
||||
search_kind: SearchKind,
|
||||
minimum_len: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum SearchKind {
|
||||
Teddy(Teddy),
|
||||
RabinKarp,
|
||||
}
|
||||
|
||||
impl Searcher {
|
||||
/// A convenience function for constructing a searcher from an iterator
|
||||
/// of things that can be converted to a `&[u8]`.
|
||||
///
|
||||
/// If a searcher could not be constructed (either because of an
|
||||
/// unsupported CPU or because there are too many patterns), then `None`
|
||||
/// is returned.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// Basic usage:
|
||||
///
|
||||
/// ```
|
||||
/// use aho_corasick::packed::{MatchKind, Searcher};
|
||||
///
|
||||
/// # fn example() -> Option<()> {
|
||||
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||
/// let matches: Vec<usize> = searcher
|
||||
/// .find_iter("foobar")
|
||||
/// .map(|mat| mat.pattern())
|
||||
/// .collect();
|
||||
/// assert_eq!(vec![0], matches);
|
||||
/// # Some(()) }
|
||||
/// # if cfg!(target_arch = "x86_64") {
|
||||
/// # example().unwrap()
|
||||
/// # } else {
|
||||
/// # assert!(example().is_none());
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn new<I, P>(patterns: I) -> Option<Searcher>
|
||||
where
|
||||
I: IntoIterator<Item = P>,
|
||||
P: AsRef<[u8]>,
|
||||
{
|
||||
Builder::new().extend(patterns).build()
|
||||
}
|
||||
|
||||
/// Return the first occurrence of any of the patterns in this searcher,
|
||||
/// according to its match semantics, in the given haystack. The `Match`
|
||||
/// returned will include the identifier of the pattern that matched, which
|
||||
/// corresponds to the index of the pattern (starting from `0`) in which it
|
||||
/// was added.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// Basic usage:
|
||||
///
|
||||
/// ```
|
||||
/// use aho_corasick::packed::{MatchKind, Searcher};
|
||||
///
|
||||
/// # fn example() -> Option<()> {
|
||||
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||
/// let mat = searcher.find("foobar")?;
|
||||
/// assert_eq!(0, mat.pattern());
|
||||
/// assert_eq!(0, mat.start());
|
||||
/// assert_eq!(6, mat.end());
|
||||
/// # Some(()) }
|
||||
/// # if cfg!(target_arch = "x86_64") {
|
||||
/// # example().unwrap()
|
||||
/// # } else {
|
||||
/// # assert!(example().is_none());
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<Match> {
|
||||
self.find_at(haystack, 0)
|
||||
}
|
||||
|
||||
/// Return the first occurrence of any of the patterns in this searcher,
|
||||
/// according to its match semantics, in the given haystack starting from
|
||||
/// the given position.
|
||||
///
|
||||
/// The `Match` returned will include the identifier of the pattern that
|
||||
/// matched, which corresponds to the index of the pattern (starting from
|
||||
/// `0`) in which it was added. The offsets in the `Match` will be relative
|
||||
/// to the start of `haystack` (and not `at`).
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// Basic usage:
|
||||
///
|
||||
/// ```
|
||||
/// use aho_corasick::packed::{MatchKind, Searcher};
|
||||
///
|
||||
/// # fn example() -> Option<()> {
|
||||
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||
/// let mat = searcher.find_at("foofoobar", 3)?;
|
||||
/// assert_eq!(0, mat.pattern());
|
||||
/// assert_eq!(3, mat.start());
|
||||
/// assert_eq!(9, mat.end());
|
||||
/// # Some(()) }
|
||||
/// # if cfg!(target_arch = "x86_64") {
|
||||
/// # example().unwrap()
|
||||
/// # } else {
|
||||
/// # assert!(example().is_none());
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn find_at<B: AsRef<[u8]>>(
|
||||
&self,
|
||||
haystack: B,
|
||||
at: usize,
|
||||
) -> Option<Match> {
|
||||
let haystack = haystack.as_ref();
|
||||
match self.search_kind {
|
||||
SearchKind::Teddy(ref teddy) => {
|
||||
if haystack[at..].len() < teddy.minimum_len() {
|
||||
return self.slow_at(haystack, at);
|
||||
}
|
||||
teddy.find_at(&self.patterns, haystack, at)
|
||||
}
|
||||
SearchKind::RabinKarp => {
|
||||
self.rabinkarp.find_at(&self.patterns, haystack, at)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an iterator of non-overlapping occurrences of the patterns in
|
||||
/// this searcher, according to its match semantics, in the given haystack.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// Basic usage:
|
||||
///
|
||||
/// ```
|
||||
/// use aho_corasick::packed::{MatchKind, Searcher};
|
||||
///
|
||||
/// # fn example() -> Option<()> {
|
||||
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||
/// let matches: Vec<usize> = searcher
|
||||
/// .find_iter("foobar fooba foofoo")
|
||||
/// .map(|mat| mat.pattern())
|
||||
/// .collect();
|
||||
/// assert_eq!(vec![0, 1, 1, 1], matches);
|
||||
/// # Some(()) }
|
||||
/// # if cfg!(target_arch = "x86_64") {
|
||||
/// # example().unwrap()
|
||||
/// # } else {
|
||||
/// # assert!(example().is_none());
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>(
|
||||
&'a self,
|
||||
haystack: &'b B,
|
||||
) -> FindIter<'a, 'b> {
|
||||
FindIter { searcher: self, haystack: haystack.as_ref(), at: 0 }
|
||||
}
|
||||
|
||||
/// Returns the match kind used by this packed searcher.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Basic usage:
|
||||
///
|
||||
/// ```
|
||||
/// use aho_corasick::packed::{MatchKind, Searcher};
|
||||
///
|
||||
/// # fn example() -> Option<()> {
|
||||
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||
/// // leftmost-first is the default.
|
||||
/// assert_eq!(&MatchKind::LeftmostFirst, searcher.match_kind());
|
||||
/// # Some(()) }
|
||||
/// # if cfg!(target_arch = "x86_64") {
|
||||
/// # example().unwrap()
|
||||
/// # } else {
|
||||
/// # assert!(example().is_none());
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn match_kind(&self) -> &MatchKind {
|
||||
self.patterns.match_kind()
|
||||
}
|
||||
|
||||
/// Returns the minimum length of a haystack that is required in order for
|
||||
/// packed searching to be effective.
|
||||
///
|
||||
/// In some cases, the underlying packed searcher may not be able to search
|
||||
/// very short haystacks. When that occurs, the implementation will defer
|
||||
/// to a slower non-packed searcher (which is still generally faster than
|
||||
/// Aho-Corasick for a small number of patterns). However, callers may
|
||||
/// want to avoid ever using the slower variant, which one can do by
|
||||
/// never passing a haystack shorter than the minimum length returned by
|
||||
/// this method.
|
||||
pub fn minimum_len(&self) -> usize {
|
||||
self.minimum_len
|
||||
}
|
||||
|
||||
/// Returns the approximate total amount of heap used by this searcher, in
|
||||
/// units of bytes.
|
||||
pub fn heap_bytes(&self) -> usize {
|
||||
self.patterns.heap_bytes()
|
||||
+ self.rabinkarp.heap_bytes()
|
||||
+ self.search_kind.heap_bytes()
|
||||
}
|
||||
|
||||
/// Use a slow (non-packed) searcher.
|
||||
///
|
||||
/// This is useful when a packed searcher could be constructed, but could
|
||||
/// not be used to search a specific haystack. For example, if Teddy was
|
||||
/// built but the haystack is smaller than ~34 bytes, then Teddy might not
|
||||
/// be able to run.
|
||||
fn slow_at(&self, haystack: &[u8], at: usize) -> Option<Match> {
|
||||
self.rabinkarp.find_at(&self.patterns, haystack, at)
|
||||
}
|
||||
}
|
||||
|
||||
impl SearchKind {
|
||||
fn heap_bytes(&self) -> usize {
|
||||
match *self {
|
||||
SearchKind::Teddy(ref ted) => ted.heap_bytes(),
|
||||
SearchKind::RabinKarp => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over non-overlapping matches from a packed searcher.
|
||||
///
|
||||
/// The lifetime `'s` refers to the lifetime of the underlying
|
||||
/// [`Searcher`](struct.Searcher.html), while the lifetime `'h` refers to the
|
||||
/// lifetime of the haystack being searched.
|
||||
#[derive(Debug)]
|
||||
pub struct FindIter<'s, 'h> {
|
||||
searcher: &'s Searcher,
|
||||
haystack: &'h [u8],
|
||||
at: usize,
|
||||
}
|
||||
|
||||
impl<'s, 'h> Iterator for FindIter<'s, 'h> {
|
||||
type Item = Match;
|
||||
|
||||
fn next(&mut self) -> Option<Match> {
|
||||
if self.at > self.haystack.len() {
|
||||
return None;
|
||||
}
|
||||
match self.searcher.find_at(&self.haystack, self.at) {
|
||||
None => None,
|
||||
Some(c) => {
|
||||
self.at = c.end;
|
||||
Some(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
/*!
|
||||
A lower level API for packed multiple substring search, principally for a small
|
||||
number of patterns.
|
||||
|
||||
This sub-module provides vectorized routines for quickly finding matches of a
|
||||
small number of patterns. In general, users of this crate shouldn't need to
|
||||
interface with this module directory, as the primary
|
||||
[`AhoCorasick`](../struct.AhoCorasick.html)
|
||||
searcher will use these routines automatically as a prefilter when applicable.
|
||||
However, in some cases, callers may want to bypass the Aho-Corasick machinery
|
||||
entirely and use this vectorized searcher directly.
|
||||
|
||||
# Overview
|
||||
|
||||
The primary types in this sub-module are:
|
||||
|
||||
* [`Searcher`](struct.Searcher.html) executes the actual search algorithm to
|
||||
report matches in a haystack.
|
||||
* [`Builder`](struct.Builder.html) accumulates patterns incrementally and can
|
||||
construct a `Searcher`.
|
||||
* [`Config`](struct.Config.html) permits tuning the searcher, and itself will
|
||||
produce a `Builder` (which can then be used to build a `Searcher`).
|
||||
Currently, the only tuneable knob are the match semantics, but this may be
|
||||
expanded in the future.
|
||||
|
||||
# Examples
|
||||
|
||||
This example shows how to create a searcher from an iterator of patterns.
|
||||
By default, leftmost-first match semantics are used. (See the top-level
|
||||
[`MatchKind`](../enum.MatchKind.html) type for more details about match
|
||||
semantics, which apply similarly to packed substring search.)
|
||||
|
||||
```
|
||||
use aho_corasick::packed::{MatchKind, Searcher};
|
||||
|
||||
# fn example() -> Option<()> {
|
||||
let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||
let matches: Vec<usize> = searcher
|
||||
.find_iter("foobar")
|
||||
.map(|mat| mat.pattern())
|
||||
.collect();
|
||||
assert_eq!(vec![0], matches);
|
||||
# Some(()) }
|
||||
# if cfg!(target_arch = "x86_64") {
|
||||
# example().unwrap()
|
||||
# } else {
|
||||
# assert!(example().is_none());
|
||||
# }
|
||||
```
|
||||
|
||||
This example shows how to use [`Config`](struct.Config.html) to change the
|
||||
match semantics to leftmost-longest:
|
||||
|
||||
```
|
||||
use aho_corasick::packed::{Config, MatchKind};
|
||||
|
||||
# fn example() -> Option<()> {
|
||||
let searcher = Config::new()
|
||||
.match_kind(MatchKind::LeftmostLongest)
|
||||
.builder()
|
||||
.add("foo")
|
||||
.add("foobar")
|
||||
.build()?;
|
||||
let matches: Vec<usize> = searcher
|
||||
.find_iter("foobar")
|
||||
.map(|mat| mat.pattern())
|
||||
.collect();
|
||||
assert_eq!(vec![1], matches);
|
||||
# Some(()) }
|
||||
# if cfg!(target_arch = "x86_64") {
|
||||
# example().unwrap()
|
||||
# } else {
|
||||
# assert!(example().is_none());
|
||||
# }
|
||||
```
|
||||
|
||||
# Packed substring searching
|
||||
|
||||
Packed substring searching refers to the use of SIMD (Single Instruction,
|
||||
Multiple Data) to accelerate the detection of matches in a haystack. Unlike
|
||||
conventional algorithms, such as Aho-Corasick, SIMD algorithms for substring
|
||||
search tend to do better with a small number of patterns, where as Aho-Corasick
|
||||
generally maintains reasonably consistent performance regardless of the number
|
||||
of patterns you give it. Because of this, the vectorized searcher in this
|
||||
sub-module cannot be used as a general purpose searcher, since building the
|
||||
searcher may fail. However, in exchange, when searching for a small number of
|
||||
patterns, searching can be quite a bit faster than Aho-Corasick (sometimes by
|
||||
an order of magnitude).
|
||||
|
||||
The key take away here is that constructing a searcher from a list of patterns
|
||||
is a fallible operation. While the precise conditions under which building a
|
||||
searcher can fail is specifically an implementation detail, here are some
|
||||
common reasons:
|
||||
|
||||
* Too many patterns were given. Typically, the limit is on the order of 100 or
|
||||
so, but this limit may fluctuate based on available CPU features.
|
||||
* The available packed algorithms require CPU features that aren't available.
|
||||
For example, currently, this crate only provides packed algorithms for
|
||||
`x86_64`. Therefore, constructing a packed searcher on any other target
|
||||
(e.g., ARM) will always fail.
|
||||
* Zero patterns were given, or one of the patterns given was empty. Packed
|
||||
searchers require at least one pattern and that all patterns are non-empty.
|
||||
* Something else about the nature of the patterns (typically based on
|
||||
heuristics) suggests that a packed searcher would perform very poorly, so
|
||||
no searcher is built.
|
||||
*/
|
||||
|
||||
pub use packed::api::{Builder, Config, FindIter, MatchKind, Searcher};
|
||||
|
||||
mod api;
|
||||
mod pattern;
|
||||
mod rabinkarp;
|
||||
mod teddy;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
mod vector;
|
|
@ -0,0 +1,318 @@
|
|||
use std::cmp;
|
||||
use std::fmt;
|
||||
use std::mem;
|
||||
use std::u16;
|
||||
use std::usize;
|
||||
|
||||
use packed::api::MatchKind;
|
||||
|
||||
/// The type used for representing a pattern identifier.
|
||||
///
|
||||
/// We don't use `usize` here because our packed searchers don't scale to
|
||||
/// huge numbers of patterns, so we keep things a bit smaller.
|
||||
pub type PatternID = u16;
|
||||
|
||||
/// A non-empty collection of non-empty patterns to search for.
|
||||
///
|
||||
/// This collection of patterns is what is passed around to both execute
|
||||
/// searches and to construct the searchers themselves. Namely, this permits
|
||||
/// searches to avoid copying all of the patterns, and allows us to keep only
|
||||
/// one copy throughout all packed searchers.
|
||||
///
|
||||
/// Note that this collection is not a set. The same pattern can appear more
|
||||
/// than once.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Patterns {
|
||||
/// The match semantics supported by this collection of patterns.
|
||||
///
|
||||
/// The match semantics determines the order of the iterator over patterns.
|
||||
/// For leftmost-first, patterns are provided in the same order as were
|
||||
/// provided by the caller. For leftmost-longest, patterns are provided in
|
||||
/// descending order of length, with ties broken by the order in which they
|
||||
/// were provided by the caller.
|
||||
kind: MatchKind,
|
||||
/// The collection of patterns, indexed by their identifier.
|
||||
by_id: Vec<Vec<u8>>,
|
||||
/// The order of patterns defined for iteration, given by pattern
|
||||
/// identifiers. The order of `by_id` and `order` is always the same for
|
||||
/// leftmost-first semantics, but may be different for leftmost-longest
|
||||
/// semantics.
|
||||
order: Vec<PatternID>,
|
||||
/// The length of the smallest pattern, in bytes.
|
||||
minimum_len: usize,
|
||||
/// The largest pattern identifier. This should always be equivalent to
|
||||
/// the number of patterns minus one in this collection.
|
||||
max_pattern_id: PatternID,
|
||||
/// The total number of pattern bytes across the entire collection. This
|
||||
/// is used for reporting total heap usage in constant time.
|
||||
total_pattern_bytes: usize,
|
||||
}
|
||||
|
||||
impl Patterns {
|
||||
/// Create a new collection of patterns for the given match semantics. The
|
||||
/// ID of each pattern is the index of the pattern at which it occurs in
|
||||
/// the `by_id` slice.
|
||||
///
|
||||
/// If any of the patterns in the slice given are empty, then this panics.
|
||||
/// Similarly, if the number of patterns given is zero, then this also
|
||||
/// panics.
|
||||
pub fn new() -> Patterns {
|
||||
Patterns {
|
||||
kind: MatchKind::default(),
|
||||
by_id: vec![],
|
||||
order: vec![],
|
||||
minimum_len: usize::MAX,
|
||||
max_pattern_id: 0,
|
||||
total_pattern_bytes: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a pattern to this collection.
|
||||
///
|
||||
/// This panics if the pattern given is empty.
|
||||
pub fn add(&mut self, bytes: &[u8]) {
|
||||
assert!(!bytes.is_empty());
|
||||
assert!(self.by_id.len() <= u16::MAX as usize);
|
||||
|
||||
let id = self.by_id.len() as u16;
|
||||
self.max_pattern_id = id;
|
||||
self.order.push(id);
|
||||
self.by_id.push(bytes.to_vec());
|
||||
self.minimum_len = cmp::min(self.minimum_len, bytes.len());
|
||||
self.total_pattern_bytes += bytes.len();
|
||||
}
|
||||
|
||||
/// Set the match kind semantics for this collection of patterns.
|
||||
///
|
||||
/// If the kind is not set, then the default is leftmost-first.
|
||||
pub fn set_match_kind(&mut self, kind: MatchKind) {
|
||||
match kind {
|
||||
MatchKind::LeftmostFirst => {
|
||||
self.order.sort();
|
||||
}
|
||||
MatchKind::LeftmostLongest => {
|
||||
let (order, by_id) = (&mut self.order, &mut self.by_id);
|
||||
order.sort_by(|&id1, &id2| {
|
||||
by_id[id1 as usize]
|
||||
.len()
|
||||
.cmp(&by_id[id2 as usize].len())
|
||||
.reverse()
|
||||
});
|
||||
}
|
||||
MatchKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of patterns in this collection.
|
||||
///
|
||||
/// This is guaranteed to be greater than zero.
|
||||
pub fn len(&self) -> usize {
|
||||
self.by_id.len()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this collection of patterns is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Returns the approximate total amount of heap used by these patterns, in
|
||||
/// units of bytes.
|
||||
pub fn heap_bytes(&self) -> usize {
|
||||
self.order.len() * mem::size_of::<PatternID>()
|
||||
+ self.by_id.len() * mem::size_of::<Vec<u8>>()
|
||||
+ self.total_pattern_bytes
|
||||
}
|
||||
|
||||
/// Clears all heap memory associated with this collection of patterns and
|
||||
/// resets all state such that it is a valid empty collection.
|
||||
pub fn reset(&mut self) {
|
||||
self.kind = MatchKind::default();
|
||||
self.by_id.clear();
|
||||
self.order.clear();
|
||||
self.minimum_len = usize::MAX;
|
||||
self.max_pattern_id = 0;
|
||||
}
|
||||
|
||||
/// Return the maximum pattern identifier in this collection. This can be
|
||||
/// useful in searchers for ensuring that the collection of patterns they
|
||||
/// are provided at search time and at build time have the same size.
|
||||
pub fn max_pattern_id(&self) -> PatternID {
|
||||
assert_eq!((self.max_pattern_id + 1) as usize, self.len());
|
||||
self.max_pattern_id
|
||||
}
|
||||
|
||||
/// Returns the length, in bytes, of the smallest pattern.
|
||||
///
|
||||
/// This is guaranteed to be at least one.
|
||||
pub fn minimum_len(&self) -> usize {
|
||||
self.minimum_len
|
||||
}
|
||||
|
||||
/// Returns the match semantics used by these patterns.
|
||||
pub fn match_kind(&self) -> &MatchKind {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
/// Return the pattern with the given identifier. If such a pattern does
|
||||
/// not exist, then this panics.
|
||||
pub fn get(&self, id: PatternID) -> Pattern {
|
||||
Pattern(&self.by_id[id as usize])
|
||||
}
|
||||
|
||||
/// Return the pattern with the given identifier without performing bounds
|
||||
/// checks.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Callers must ensure that a pattern with the given identifier exists
|
||||
/// before using this method.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub unsafe fn get_unchecked(&self, id: PatternID) -> Pattern {
|
||||
Pattern(self.by_id.get_unchecked(id as usize))
|
||||
}
|
||||
|
||||
/// Return an iterator over all the patterns in this collection, in the
|
||||
/// order in which they should be matched.
|
||||
///
|
||||
/// Specifically, in a naive multi-pattern matcher, the following is
|
||||
/// guaranteed to satisfy the match semantics of this collection of
|
||||
/// patterns:
|
||||
///
|
||||
/// ```ignore
|
||||
/// for i in 0..haystack.len():
|
||||
/// for p in patterns.iter():
|
||||
/// if haystack[i..].starts_with(p.bytes()):
|
||||
/// return Match(p.id(), i, i + p.bytes().len())
|
||||
/// ```
|
||||
///
|
||||
/// Namely, among the patterns in a collection, if they are matched in
|
||||
/// the order provided by this iterator, then the result is guaranteed
|
||||
/// to satisfy the correct match semantics. (Either leftmost-first or
|
||||
/// leftmost-longest.)
|
||||
pub fn iter(&self) -> PatternIter {
|
||||
PatternIter { patterns: self, i: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over the patterns in the `Patterns` collection.
|
||||
///
|
||||
/// The order of the patterns provided by this iterator is consistent with the
|
||||
/// match semantics of the originating collection of patterns.
|
||||
///
|
||||
/// The lifetime `'p` corresponds to the lifetime of the collection of patterns
|
||||
/// this is iterating over.
|
||||
#[derive(Debug)]
|
||||
pub struct PatternIter<'p> {
|
||||
patterns: &'p Patterns,
|
||||
i: usize,
|
||||
}
|
||||
|
||||
impl<'p> Iterator for PatternIter<'p> {
|
||||
type Item = (PatternID, Pattern<'p>);
|
||||
|
||||
fn next(&mut self) -> Option<(PatternID, Pattern<'p>)> {
|
||||
if self.i >= self.patterns.len() {
|
||||
return None;
|
||||
}
|
||||
let id = self.patterns.order[self.i];
|
||||
let p = self.patterns.get(id);
|
||||
self.i += 1;
|
||||
Some((id, p))
|
||||
}
|
||||
}
|
||||
|
||||
/// A pattern that is used in packed searching.
|
||||
#[derive(Clone)]
|
||||
pub struct Pattern<'a>(&'a [u8]);
|
||||
|
||||
impl<'a> fmt::Debug for Pattern<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("Pattern")
|
||||
.field("lit", &String::from_utf8_lossy(&self.0))
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p> Pattern<'p> {
|
||||
/// Returns the length of this pattern, in bytes.
|
||||
pub fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
/// Returns the bytes of this pattern.
|
||||
pub fn bytes(&self) -> &[u8] {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Returns the first `len` low nybbles from this pattern. If this pattern
|
||||
/// is shorter than `len`, then this panics.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub fn low_nybbles(&self, len: usize) -> Vec<u8> {
|
||||
let mut nybs = vec![];
|
||||
for &b in self.bytes().iter().take(len) {
|
||||
nybs.push(b & 0xF);
|
||||
}
|
||||
nybs
|
||||
}
|
||||
|
||||
/// Returns true if this pattern is a prefix of the given bytes.
|
||||
#[inline(always)]
|
||||
pub fn is_prefix(&self, bytes: &[u8]) -> bool {
|
||||
self.len() <= bytes.len() && self.equals(&bytes[..self.len()])
|
||||
}
|
||||
|
||||
/// Returns true if and only if this pattern equals the given bytes.
|
||||
#[inline(always)]
|
||||
pub fn equals(&self, bytes: &[u8]) -> bool {
|
||||
// Why not just use memcmp for this? Well, memcmp requires calling out
|
||||
// to libc, and this routine is called in fairly hot code paths. Other
|
||||
// than just calling out to libc, it also seems to result in worse
|
||||
// codegen. By rolling our own memcpy in pure Rust, it seems to appear
|
||||
// more friendly to the optimizer.
|
||||
//
|
||||
// This results in an improvement in just about every benchmark. Some
|
||||
// smaller than others, but in some cases, up to 30% faster.
|
||||
|
||||
if self.len() != bytes.len() {
|
||||
return false;
|
||||
}
|
||||
if self.len() < 8 {
|
||||
for (&b1, &b2) in self.bytes().iter().zip(bytes) {
|
||||
if b1 != b2 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// When we have 8 or more bytes to compare, then proceed in chunks of
|
||||
// 8 at a time using unaligned loads.
|
||||
let mut p1 = self.bytes().as_ptr();
|
||||
let mut p2 = bytes.as_ptr();
|
||||
let p1end = self.bytes()[self.len() - 8..].as_ptr();
|
||||
let p2end = bytes[bytes.len() - 8..].as_ptr();
|
||||
// SAFETY: Via the conditional above, we know that both `p1` and `p2`
|
||||
// have the same length, so `p1 < p1end` implies that `p2 < p2end`.
|
||||
// Thus, derefencing both `p1` and `p2` in the loop below is safe.
|
||||
//
|
||||
// Moreover, we set `p1end` and `p2end` to be 8 bytes before the actual
|
||||
// end of of `p1` and `p2`. Thus, the final dereference outside of the
|
||||
// loop is guaranteed to be valid.
|
||||
//
|
||||
// Finally, we needn't worry about 64-bit alignment here, since we
|
||||
// do unaligned loads.
|
||||
unsafe {
|
||||
while p1 < p1end {
|
||||
let v1 = (p1 as *const u64).read_unaligned();
|
||||
let v2 = (p2 as *const u64).read_unaligned();
|
||||
if v1 != v2 {
|
||||
return false;
|
||||
}
|
||||
p1 = p1.add(8);
|
||||
p2 = p2.add(8);
|
||||
}
|
||||
let v1 = (p1end as *const u64).read_unaligned();
|
||||
let v2 = (p2end as *const u64).read_unaligned();
|
||||
v1 == v2
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
use std::mem;
|
||||
|
||||
use packed::pattern::{PatternID, Patterns};
|
||||
use Match;
|
||||
|
||||
/// The type of the rolling hash used in the Rabin-Karp algorithm.
|
||||
type Hash = usize;
|
||||
|
||||
/// The number of buckets to store our patterns in. We don't want this to be
|
||||
/// too big in order to avoid wasting memory, but we don't want it to be too
|
||||
/// small either to avoid spending too much time confirming literals.
|
||||
///
|
||||
/// The number of buckets MUST be a power of two. Otherwise, determining the
|
||||
/// bucket from a hash will slow down the code considerably. Using a power
|
||||
/// of two means `hash % NUM_BUCKETS` can compile down to a simple `and`
|
||||
/// instruction.
|
||||
const NUM_BUCKETS: usize = 64;
|
||||
|
||||
/// An implementation of the Rabin-Karp algorithm. The main idea of this
|
||||
/// algorithm is to maintain a rolling hash as it moves through the input, and
|
||||
/// then check whether that hash corresponds to the same hash for any of the
|
||||
/// patterns we're looking for.
|
||||
///
|
||||
/// A draw back of naively scaling Rabin-Karp to multiple patterns is that
|
||||
/// it requires all of the patterns to be the same length, which in turn
|
||||
/// corresponds to the number of bytes to hash. We adapt this to work for
|
||||
/// multiple patterns of varying size by fixing the number of bytes to hash
|
||||
/// to be the length of the smallest pattern. We also split the patterns into
|
||||
/// several buckets to hopefully make the confirmation step faster.
|
||||
///
|
||||
/// Wikipedia has a decent explanation, if a bit heavy on the theory:
|
||||
/// https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
|
||||
///
|
||||
/// But ESMAJ provides something a bit more concrete:
|
||||
/// http://www-igm.univ-mlv.fr/~lecroq/string/node5.html
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RabinKarp {
|
||||
/// The order of patterns in each bucket is significant. Namely, they are
|
||||
/// arranged such that the first one to match is the correct match. This
|
||||
/// may not necessarily correspond to the order provided by the caller.
|
||||
/// For example, if leftmost-longest semantics are used, then the patterns
|
||||
/// are sorted by their length in descending order. If leftmost-first
|
||||
/// semantics are used, then the patterns are sorted by their pattern ID
|
||||
/// in ascending order (which corresponds to the caller's order).
|
||||
buckets: Vec<Vec<(Hash, PatternID)>>,
|
||||
/// The length of the hashing window. Generally, this corresponds to the
|
||||
/// length of the smallest pattern.
|
||||
hash_len: usize,
|
||||
/// The factor to subtract out of a hash before updating it with a new
|
||||
/// byte.
|
||||
hash_2pow: usize,
|
||||
/// The maximum identifier of a pattern. This is used as a sanity check
|
||||
/// to ensure that the patterns provided by the caller are the same as
|
||||
/// the patterns that were used to compile the matcher. This sanity check
|
||||
/// possibly permits safely eliminating bounds checks regardless of what
|
||||
/// patterns are provided by the caller.
|
||||
///
|
||||
/// (Currently, we don't use this to elide bounds checks since it doesn't
|
||||
/// result in a measurable performance improvement, but we do use it for
|
||||
/// better failure modes.)
|
||||
max_pattern_id: PatternID,
|
||||
}
|
||||
|
||||
impl RabinKarp {
|
||||
/// Compile a new Rabin-Karp matcher from the patterns given.
|
||||
///
|
||||
/// This panics if any of the patterns in the collection are empty, or if
|
||||
/// the collection is itself empty.
|
||||
pub fn new(patterns: &Patterns) -> RabinKarp {
|
||||
assert!(patterns.len() >= 1);
|
||||
let hash_len = patterns.minimum_len();
|
||||
assert!(hash_len >= 1);
|
||||
|
||||
let mut hash_2pow = 1usize;
|
||||
for _ in 1..hash_len {
|
||||
hash_2pow = hash_2pow.wrapping_shl(1);
|
||||
}
|
||||
|
||||
let mut rk = RabinKarp {
|
||||
buckets: vec![vec![]; NUM_BUCKETS],
|
||||
hash_len,
|
||||
hash_2pow,
|
||||
max_pattern_id: patterns.max_pattern_id(),
|
||||
};
|
||||
for (id, pat) in patterns.iter() {
|
||||
let hash = rk.hash(&pat.bytes()[..rk.hash_len]);
|
||||
let bucket = hash % NUM_BUCKETS;
|
||||
rk.buckets[bucket].push((hash, id));
|
||||
}
|
||||
rk
|
||||
}
|
||||
|
||||
/// Return the first matching pattern in the given haystack, begining the
|
||||
/// search at `at`.
|
||||
pub fn find_at(
|
||||
&self,
|
||||
patterns: &Patterns,
|
||||
haystack: &[u8],
|
||||
mut at: usize,
|
||||
) -> Option<Match> {
|
||||
assert_eq!(NUM_BUCKETS, self.buckets.len());
|
||||
assert_eq!(
|
||||
self.max_pattern_id,
|
||||
patterns.max_pattern_id(),
|
||||
"Rabin-Karp must be called with same patterns it was built with",
|
||||
);
|
||||
|
||||
if at + self.hash_len > haystack.len() {
|
||||
return None;
|
||||
}
|
||||
let mut hash = self.hash(&haystack[at..at + self.hash_len]);
|
||||
loop {
|
||||
let bucket = &self.buckets[hash % NUM_BUCKETS];
|
||||
for &(phash, pid) in bucket {
|
||||
if phash == hash {
|
||||
if let Some(c) = self.verify(patterns, pid, haystack, at) {
|
||||
return Some(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
if at + self.hash_len >= haystack.len() {
|
||||
return None;
|
||||
}
|
||||
hash = self.update_hash(
|
||||
hash,
|
||||
haystack[at],
|
||||
haystack[at + self.hash_len],
|
||||
);
|
||||
at += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the approximate total amount of heap used by this searcher, in
|
||||
/// units of bytes.
|
||||
pub fn heap_bytes(&self) -> usize {
|
||||
let num_patterns = self.max_pattern_id as usize + 1;
|
||||
self.buckets.len() * mem::size_of::<Vec<(Hash, PatternID)>>()
|
||||
+ num_patterns * mem::size_of::<(Hash, PatternID)>()
|
||||
}
|
||||
|
||||
/// Verify whether the pattern with the given id matches at
|
||||
/// `haystack[at..]`.
|
||||
///
|
||||
/// We tag this function as `cold` because it helps improve codegen.
|
||||
/// Intuitively, it would seem like inlining it would be better. However,
|
||||
/// the only time this is called and a match is not found is when there
|
||||
/// there is a hash collision, or when a prefix of a pattern matches but
|
||||
/// the entire pattern doesn't match. This is hopefully fairly rare, and
|
||||
/// if it does occur a lot, it's going to be slow no matter what we do.
|
||||
#[cold]
|
||||
fn verify(
|
||||
&self,
|
||||
patterns: &Patterns,
|
||||
id: PatternID,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Option<Match> {
|
||||
let pat = patterns.get(id);
|
||||
if pat.is_prefix(&haystack[at..]) {
|
||||
Some(Match::from_span(id as usize, at, at + pat.len()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Hash the given bytes.
|
||||
fn hash(&self, bytes: &[u8]) -> Hash {
|
||||
assert_eq!(self.hash_len, bytes.len());
|
||||
|
||||
let mut hash = 0usize;
|
||||
for &b in bytes {
|
||||
hash = hash.wrapping_shl(1).wrapping_add(b as usize);
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
/// Update the hash given based on removing `old_byte` at the beginning
|
||||
/// of some byte string, and appending `new_byte` to the end of that same
|
||||
/// byte string.
|
||||
fn update_hash(&self, prev: Hash, old_byte: u8, new_byte: u8) -> Hash {
|
||||
prev.wrapping_sub((old_byte as usize).wrapping_mul(self.hash_2pow))
|
||||
.wrapping_shl(1)
|
||||
.wrapping_add(new_byte as usize)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,386 @@
|
|||
Teddy is a simd accelerated multiple substring matching algorithm. The name
|
||||
and the core ideas in the algorithm were learned from the [Hyperscan][1_u]
|
||||
project. The implementation in this repository was mostly motivated for use in
|
||||
accelerating regex searches by searching for small sets of required literals
|
||||
extracted from the regex.
|
||||
|
||||
|
||||
# Background
|
||||
|
||||
The key idea of Teddy is to do *packed* substring matching. In the literature,
|
||||
packed substring matching is the idea of examining multiple bytes in a haystack
|
||||
at a time to detect matches. Implementations of, for example, memchr (which
|
||||
detects matches of a single byte) have been doing this for years. Only
|
||||
recently, with the introduction of various SIMD instructions, has this been
|
||||
extended to substring matching. The PCMPESTRI instruction (and its relatives),
|
||||
for example, implements substring matching in hardware. It is, however, limited
|
||||
to substrings of length 16 bytes or fewer, but this restriction is fine in a
|
||||
regex engine, since we rarely care about the performance difference between
|
||||
searching for a 16 byte literal and a 16 + N literal; 16 is already long
|
||||
enough. The key downside of the PCMPESTRI instruction, on current (2016) CPUs
|
||||
at least, is its latency and throughput. As a result, it is often faster to
|
||||
do substring search with a Boyer-Moore (or Two-Way) variant and a well placed
|
||||
memchr to quickly skip through the haystack.
|
||||
|
||||
There are fewer results from the literature on packed substring matching,
|
||||
and even fewer for packed multiple substring matching. Ben-Kiki et al. [2]
|
||||
describes use of PCMPESTRI for substring matching, but is mostly theoretical
|
||||
and hand-waves performance. There is other theoretical work done by Bille [3]
|
||||
as well.
|
||||
|
||||
The rest of the work in the field, as far as I'm aware, is by Faro and Kulekci
|
||||
and is generally focused on multiple pattern search. Their first paper [4a]
|
||||
introduces the concept of a fingerprint, which is computed for every block of
|
||||
N bytes in every pattern. The haystack is then scanned N bytes at a time and
|
||||
a fingerprint is computed in the same way it was computed for blocks in the
|
||||
patterns. If the fingerprint corresponds to one that was found in a pattern,
|
||||
then a verification step follows to confirm that one of the substrings with the
|
||||
corresponding fingerprint actually matches at the current location. Various
|
||||
implementation tricks are employed to make sure the fingerprint lookup is fast;
|
||||
typically by truncating the fingerprint. (This may, of course, provoke more
|
||||
steps in the verification process, so a balance must be struck.)
|
||||
|
||||
The main downside of [4a] is that the minimum substring length is 32 bytes,
|
||||
presumably because of how the algorithm uses certain SIMD instructions. This
|
||||
essentially makes it useless for general purpose regex matching, where a small
|
||||
number of short patterns is far more likely.
|
||||
|
||||
Faro and Kulekci published another paper [4b] that is conceptually very similar
|
||||
to [4a]. The key difference is that it uses the CRC32 instruction (introduced
|
||||
as part of SSE 4.2) to compute fingerprint values. This also enables the
|
||||
algorithm to work effectively on substrings as short as 7 bytes with 4 byte
|
||||
windows. 7 bytes is unfortunately still too long. The window could be
|
||||
technically shrunk to 2 bytes, thereby reducing minimum length to 3, but the
|
||||
small window size ends up negating most performance benefits—and it's likely
|
||||
the common case in a general purpose regex engine.
|
||||
|
||||
Faro and Kulekci also published [4c] that appears to be intended as a
|
||||
replacement to using PCMPESTRI. In particular, it is specifically motivated by
|
||||
the high throughput/latency time of PCMPESTRI and therefore chooses other SIMD
|
||||
instructions that are faster. While this approach works for short substrings,
|
||||
I personally couldn't see a way to generalize it to multiple substring search.
|
||||
|
||||
Faro and Kulekci have another paper [4d] that I haven't been able to read
|
||||
because it is behind a paywall.
|
||||
|
||||
|
||||
# Teddy
|
||||
|
||||
Finally, we get to Teddy. If the above literature review is complete, then it
|
||||
appears that Teddy is a novel algorithm. More than that, in my experience, it
|
||||
completely blows away the competition for short substrings, which is exactly
|
||||
what we want in a general purpose regex engine. Again, the algorithm appears
|
||||
to be developed by the authors of [Hyperscan][1_u]. Hyperscan was open sourced
|
||||
late 2015, and no earlier history could be found. Therefore, tracking the exact
|
||||
provenance of the algorithm with respect to the published literature seems
|
||||
difficult.
|
||||
|
||||
At a high level, Teddy works somewhat similarly to the fingerprint algorithms
|
||||
published by Faro and Kulekci, but Teddy does it in a way that scales a bit
|
||||
better. Namely:
|
||||
|
||||
1. Teddy's core algorithm scans the haystack in 16 (for SSE, or 32 for AVX)
|
||||
byte chunks. 16 (or 32) is significant because it corresponds to the number
|
||||
of bytes in a SIMD vector.
|
||||
2. Bitwise operations are performed on each chunk to discover if any region of
|
||||
it matches a set of precomputed fingerprints from the patterns. If there are
|
||||
matches, then a verification step is performed. In this implementation, our
|
||||
verification step is naive. This can be improved upon.
|
||||
|
||||
The details to make this work are quite clever. First, we must choose how to
|
||||
pick our fingerprints. In Hyperscan's implementation, I *believe* they use the
|
||||
last N bytes of each substring, where N must be at least the minimum length of
|
||||
any substring in the set being searched. In this implementation, we use the
|
||||
first N bytes of each substring. (The tradeoffs between these choices aren't
|
||||
yet clear to me.) We then must figure out how to quickly test whether an
|
||||
occurrence of any fingerprint from the set of patterns appears in a 16 byte
|
||||
block from the haystack. To keep things simple, let's assume N = 1 and examine
|
||||
some examples to motivate the approach. Here are our patterns:
|
||||
|
||||
```ignore
|
||||
foo
|
||||
bar
|
||||
baz
|
||||
```
|
||||
|
||||
The corresponding fingerprints, for N = 1, are `f`, `b` and `b`. Now let's set
|
||||
our 16 byte block to:
|
||||
|
||||
```ignore
|
||||
bat cat foo bump
|
||||
xxxxxxxxxxxxxxxx
|
||||
```
|
||||
|
||||
To cut to the chase, Teddy works by using bitsets. In particular, Teddy creates
|
||||
a mask that allows us to quickly compute membership of a fingerprint in a 16
|
||||
byte block that also tells which pattern the fingerprint corresponds to. In
|
||||
this case, our fingerprint is a single byte, so an appropriate abstraction is
|
||||
a map from a single byte to a list of patterns that contain that fingerprint:
|
||||
|
||||
```ignore
|
||||
f |--> foo
|
||||
b |--> bar, baz
|
||||
```
|
||||
|
||||
Now, all we need to do is figure out how to represent this map in vector space
|
||||
and use normal SIMD operations to perform a lookup. The first simplification
|
||||
we can make is to represent our patterns as bit fields occupying a single
|
||||
byte. This is important, because a single SIMD vector can store 16 bytes.
|
||||
|
||||
```ignore
|
||||
f |--> 00000001
|
||||
b |--> 00000010, 00000100
|
||||
```
|
||||
|
||||
How do we perform lookup though? It turns out that SSSE3 introduced a very cool
|
||||
instruction called PSHUFB. The instruction takes two SIMD vectors, `A` and `B`,
|
||||
and returns a third vector `C`. All vectors are treated as 16 8-bit integers.
|
||||
`C` is formed by `C[i] = A[B[i]]`. (This is a bit of a simplification, but true
|
||||
for the purposes of this algorithm. For full details, see [Intel's Intrinsics
|
||||
Guide][5_u].) This essentially lets us use the values in `B` to lookup values
|
||||
in `A`.
|
||||
|
||||
If we could somehow cause `B` to contain our 16 byte block from the haystack,
|
||||
and if `A` could contain our bitmasks, then we'd end up with something like
|
||||
this for `A`:
|
||||
|
||||
```ignore
|
||||
0x00 0x01 ... 0x62 ... 0x66 ... 0xFF
|
||||
A = 0 0 00000110 00000001 0
|
||||
```
|
||||
|
||||
And if `B` contains our window from our haystack, we could use shuffle to take
|
||||
the values from `B` and use them to look up our bitsets in `A`. But of course,
|
||||
we can't do this because `A` in the above example contains 256 bytes, which
|
||||
is much larger than the size of a SIMD vector.
|
||||
|
||||
Nybbles to the rescue! A nybble is 4 bits. Instead of one mask to hold all of
|
||||
our bitsets, we can use two masks, where one mask corresponds to the lower four
|
||||
bits of our fingerprint and the other mask corresponds to the upper four bits.
|
||||
So our map now looks like:
|
||||
|
||||
```ignore
|
||||
'f' & 0xF = 0x6 |--> 00000001
|
||||
'f' >> 4 = 0x6 |--> 00000111
|
||||
'b' & 0xF = 0x2 |--> 00000110
|
||||
'b' >> 4 = 0x6 |--> 00000111
|
||||
```
|
||||
|
||||
Notice that the bitsets for each nybble correspond to the union of all
|
||||
fingerprints that contain that nybble. For example, both `f` and `b` have the
|
||||
same upper 4 bits but differ on the lower 4 bits. Putting this together, we
|
||||
have `A0`, `A1` and `B`, where `A0` is our mask for the lower nybble, `A1` is
|
||||
our mask for the upper nybble and `B` is our 16 byte block from the haystack:
|
||||
|
||||
```ignore
|
||||
0x00 0x01 0x02 0x03 ... 0x06 ... 0xF
|
||||
A0 = 0 0 00000110 0 00000001 0
|
||||
A1 = 0 0 0 0 00000111 0
|
||||
B = b a t _ t p
|
||||
B = 0x62 0x61 0x74 0x20 0x74 0x70
|
||||
```
|
||||
|
||||
But of course, we can't use `B` with `PSHUFB` yet, since its values are 8 bits,
|
||||
and we need indexes that are at most 4 bits (corresponding to one of 16
|
||||
values). We can apply the same transformation to split `B` into lower and upper
|
||||
nybbles as we did `A`. As before, `B0` corresponds to the lower nybbles and
|
||||
`B1` corresponds to the upper nybbles:
|
||||
|
||||
```ignore
|
||||
b a t _ c a t _ f o o _ b u m p
|
||||
B0 = 0x2 0x1 0x4 0x0 0x3 0x1 0x4 0x0 0x6 0xF 0xF 0x0 0x2 0x5 0xD 0x0
|
||||
B1 = 0x6 0x6 0x7 0x2 0x6 0x6 0x7 0x2 0x6 0x6 0x6 0x2 0x6 0x7 0x6 0x7
|
||||
```
|
||||
|
||||
And now we have a nice correspondence. `B0` can index `A0` and `B1` can index
|
||||
`A1`. Here's what we get when we apply `C0 = PSHUFB(A0, B0)`:
|
||||
|
||||
```ignore
|
||||
b a ... f o ... p
|
||||
A0[0x2] A0[0x1] A0[0x6] A0[0xF] A0[0x0]
|
||||
C0 = 00000110 0 00000001 0 0
|
||||
```
|
||||
|
||||
And `C1 = PSHUFB(A1, B1)`:
|
||||
|
||||
```ignore
|
||||
b a ... f o ... p
|
||||
A1[0x6] A1[0x6] A1[0x6] A1[0x6] A1[0x7]
|
||||
C1 = 00000111 00000111 00000111 00000111 0
|
||||
```
|
||||
|
||||
Notice how neither one of `C0` or `C1` is guaranteed to report fully correct
|
||||
results all on its own. For example, `C1` claims that `b` is a fingerprint for
|
||||
the pattern `foo` (since `A1[0x6] = 00000111`), and that `o` is a fingerprint
|
||||
for all of our patterns. But if we combined `C0` and `C1` with an `AND`
|
||||
operation:
|
||||
|
||||
```ignore
|
||||
b a ... f o ... p
|
||||
C = 00000110 0 00000001 0 0
|
||||
```
|
||||
|
||||
Then we now have that `C[i]` contains a bitset corresponding to the matching
|
||||
fingerprints in a haystack's 16 byte block, where `i` is the `ith` byte in that
|
||||
block.
|
||||
|
||||
Once we have that, we can look for the position of the least significant bit
|
||||
in `C`. (Least significant because we only target `x86_64` here, which is
|
||||
always little endian. Thus, the least significant bytes correspond to bytes
|
||||
in our haystack at a lower address.) That position, modulo `8`, gives us
|
||||
the pattern that the fingerprint matches. That position, integer divided by
|
||||
`8`, also gives us the byte offset that the fingerprint occurs in inside the
|
||||
16 byte haystack block. Using those two pieces of information, we can run a
|
||||
verification procedure that tries to match all substrings containing that
|
||||
fingerprint at that position in the haystack.
|
||||
|
||||
|
||||
# Implementation notes
|
||||
|
||||
The problem with the algorithm as described above is that it uses a single byte
|
||||
for a fingerprint. This will work well if the fingerprints are rare in the
|
||||
haystack (e.g., capital letters or special characters in normal English text),
|
||||
but if the fingerprints are common, you'll wind up spending too much time in
|
||||
the verification step, which effectively negates the performance benefits of
|
||||
scanning 16 bytes at a time. Remember, the key to the performance of this
|
||||
algorithm is to do as little work as possible per 16 (or 32) bytes.
|
||||
|
||||
This algorithm can be extrapolated in a relatively straight-forward way to use
|
||||
larger fingerprints. That is, instead of a single byte prefix, we might use a
|
||||
two or three byte prefix. The implementation here implements N = {1, 2, 3}
|
||||
and always picks the largest N possible. The rationale is that the bigger the
|
||||
fingerprint, the fewer verification steps we'll do. Of course, if N is too
|
||||
large, then we'll end up doing too much on each step.
|
||||
|
||||
The way to extend it is:
|
||||
|
||||
1. Add a mask for each byte in the fingerprint. (Remember that each mask is
|
||||
composed of two SIMD vectors.) This results in a value of `C` for each byte
|
||||
in the fingerprint while searching.
|
||||
2. When testing each 16 (or 32) byte block, each value of `C` must be shifted
|
||||
so that they are aligned. Once aligned, they should all be `AND`'d together.
|
||||
This will give you only the bitsets corresponding to the full match of the
|
||||
fingerprint. To do this, one needs to save the last byte (for N=2) or last
|
||||
two bytes (for N=3) from the previous iteration, and then line them up with
|
||||
the first one or two bytes of the next iteration.
|
||||
|
||||
## Verification
|
||||
|
||||
Verification generally follows the procedure outlined above. The tricky parts
|
||||
are in the right formulation of operations to get our bits out of our vectors.
|
||||
We have a limited set of operations available to us on SIMD vectors as 128-bit
|
||||
or 256-bit numbers, so we wind up needing to rip out 2 (or 4) 64-bit integers
|
||||
from our vectors, and then run our verification step on each of those. The
|
||||
verification step looks at the least significant bit set, and from its
|
||||
position, we can derive the byte offset and bucket. (Again, as described
|
||||
above.) Once we know the bucket, we do a fairly naive exhaustive search for
|
||||
every literal in that bucket. (Hyperscan is a bit smarter here and uses a hash
|
||||
table, but I haven't had time to thoroughly explore that. A few initial
|
||||
half-hearted attempts resulted in worse performance.)
|
||||
|
||||
## AVX
|
||||
|
||||
The AVX version of Teddy extrapolates almost perfectly from the SSE version.
|
||||
The only hickup is that PALIGNR is used to align chunks in the 16-bit version,
|
||||
and there is no equivalent instruction in AVX. AVX does have VPALIGNR, but it
|
||||
only works within 128-bit lanes. So there's a bit of tomfoolery to get around
|
||||
this by shuffling the vectors before calling VPALIGNR.
|
||||
|
||||
The only other aspect to AVX is that since our masks are still fundamentally
|
||||
16-bytes (0x0-0xF), they are duplicated to 32-bytes, so that they can apply to
|
||||
32-byte chunks.
|
||||
|
||||
## Fat Teddy
|
||||
|
||||
In the version of Teddy described above, 8 buckets are used to group patterns
|
||||
that we want to search for. However, when AVX is available, we can extend the
|
||||
number of buckets to 16 by permitting each byte in our masks to use 16-bits
|
||||
instead of 8-bits to represent the buckets it belongs to. (This variant is also
|
||||
in Hyperscan.) However, what we give up is the ability to scan 32 bytes at a
|
||||
time, even though we're using AVX. Instead, we have to scan 16 bytes at a time.
|
||||
What we gain, though, is (hopefully) less work in our verification routine.
|
||||
It patterns are more spread out across more buckets, then there should overall
|
||||
be fewer false positives. In general, Fat Teddy permits us to grow our capacity
|
||||
a bit and search for more literals before Teddy gets overwhelmed.
|
||||
|
||||
The tricky part of Fat Teddy is in how we adjust our masks and our verification
|
||||
procedure. For the masks, we simply represent the first 8 buckets in each of
|
||||
the low 16 bytes, and then the second 8 buckets in each of the high 16 bytes.
|
||||
Then, in the search loop, instead of loading 32 bytes from the haystack, we
|
||||
load the same 16 bytes from the haystack into both the low and high 16 byte
|
||||
portions of our 256-bit vector. So for example, a mask might look like this:
|
||||
|
||||
bits: 00100001 00000000 ... 11000000 00000000 00000001 ... 00000000
|
||||
byte: 31 30 16 15 14 0
|
||||
offset: 15 14 0 15 14 0
|
||||
buckets: 8-15 8-15 8-15 0-7 0-7 0-7
|
||||
|
||||
Where `byte` is the position in the vector (higher numbers corresponding to
|
||||
more significant bits), `offset` is the corresponding position in the haystack
|
||||
chunk, and `buckets` corresponds to the bucket assignments for that particular
|
||||
byte.
|
||||
|
||||
In particular, notice that the bucket assignments for offset `0` are spread
|
||||
out between bytes `0` and `16`. This works well for the chunk-by-chunk search
|
||||
procedure, but verification really wants to process all bucket assignments for
|
||||
each offset at once. Otherwise, we might wind up finding a match at offset
|
||||
`1` in one the first 8 buckets, when we really should have reported a match
|
||||
at offset `0` in one of the second 8 buckets. (Because we want the leftmost
|
||||
match.)
|
||||
|
||||
Thus, for verification, we rearrange the above vector such that it is a
|
||||
sequence of 16-bit integers, where the least significant 16-bit integer
|
||||
corresponds to all of the bucket assignments for offset `0`. So with the
|
||||
above vector, the least significant 16-bit integer would be
|
||||
|
||||
11000000 000000
|
||||
|
||||
which was taken from bytes `16` and `0`. Then the verification step pretty much
|
||||
runs as described, except with 16 buckets instead of 8.
|
||||
|
||||
|
||||
# References
|
||||
|
||||
- **[1]** [Hyperscan on GitHub](https://github.com/01org/hyperscan),
|
||||
[webpage](https://01.org/hyperscan)
|
||||
- **[2a]** Ben-Kiki, O., Bille, P., Breslauer, D., Gasieniec, L., Grossi, R.,
|
||||
& Weimann, O. (2011).
|
||||
_Optimal packed string matching_.
|
||||
In LIPIcs-Leibniz International Proceedings in Informatics (Vol. 13).
|
||||
Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik.
|
||||
DOI: 10.4230/LIPIcs.FSTTCS.2011.423.
|
||||
[PDF](http://drops.dagstuhl.de/opus/volltexte/2011/3355/pdf/37.pdf).
|
||||
- **[2b]** Ben-Kiki, O., Bille, P., Breslauer, D., Ga̧sieniec, L., Grossi, R.,
|
||||
& Weimann, O. (2014).
|
||||
_Towards optimal packed string matching_.
|
||||
Theoretical Computer Science, 525, 111-129.
|
||||
DOI: 10.1016/j.tcs.2013.06.013.
|
||||
[PDF](http://www.cs.haifa.ac.il/~oren/Publications/bpsm.pdf).
|
||||
- **[3]** Bille, P. (2011).
|
||||
_Fast searching in packed strings_.
|
||||
Journal of Discrete Algorithms, 9(1), 49-56.
|
||||
DOI: 10.1016/j.jda.2010.09.003.
|
||||
[PDF](http://www.sciencedirect.com/science/article/pii/S1570866710000353).
|
||||
- **[4a]** Faro, S., & Külekci, M. O. (2012, October).
|
||||
_Fast multiple string matching using streaming SIMD extensions technology_.
|
||||
In String Processing and Information Retrieval (pp. 217-228).
|
||||
Springer Berlin Heidelberg.
|
||||
DOI: 10.1007/978-3-642-34109-0_23.
|
||||
[PDF](http://www.dmi.unict.it/~faro/papers/conference/faro32.pdf).
|
||||
- **[4b]** Faro, S., & Külekci, M. O. (2013, September).
|
||||
_Towards a Very Fast Multiple String Matching Algorithm for Short Patterns_.
|
||||
In Stringology (pp. 78-91).
|
||||
[PDF](http://www.dmi.unict.it/~faro/papers/conference/faro36.pdf).
|
||||
- **[4c]** Faro, S., & Külekci, M. O. (2013, January).
|
||||
_Fast packed string matching for short patterns_.
|
||||
In Proceedings of the Meeting on Algorithm Engineering & Expermiments
|
||||
(pp. 113-121).
|
||||
Society for Industrial and Applied Mathematics.
|
||||
[PDF](http://arxiv.org/pdf/1209.6449.pdf).
|
||||
- **[4d]** Faro, S., & Külekci, M. O. (2014).
|
||||
_Fast and flexible packed string matching_.
|
||||
Journal of Discrete Algorithms, 28, 61-72.
|
||||
DOI: 10.1016/j.jda.2014.07.003.
|
||||
|
||||
[1_u]: https://github.com/01org/hyperscan
|
||||
[5_u]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
|
|
@ -0,0 +1,414 @@
|
|||
// See the README in this directory for an explanation of the Teddy algorithm.
|
||||
|
||||
use std::cmp;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
|
||||
use packed::pattern::{PatternID, Patterns};
|
||||
use packed::teddy::Teddy;
|
||||
|
||||
/// A builder for constructing a Teddy matcher.
|
||||
///
|
||||
/// The builder primarily permits fine grained configuration of the Teddy
|
||||
/// matcher. Most options are made only available for testing/benchmarking
|
||||
/// purposes. In reality, options are automatically determined by the nature
|
||||
/// and number of patterns given to the builder.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Builder {
|
||||
/// When none, this is automatically determined. Otherwise, `false` means
|
||||
/// slim Teddy is used (8 buckets) and `true` means fat Teddy is used
|
||||
/// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't
|
||||
/// available and Fat Teddy was requested, no matcher will be built.
|
||||
fat: Option<bool>,
|
||||
/// When none, this is automatically determined. Otherwise, `false` means
|
||||
/// that 128-bit vectors will be used (up to SSSE3 instructions) where as
|
||||
/// `true` means that 256-bit vectors will be used. As with `fat`, if
|
||||
/// 256-bit vectors are requested and they aren't available, then a
|
||||
/// searcher will not be built.
|
||||
avx: Option<bool>,
|
||||
}
|
||||
|
||||
impl Default for Builder {
|
||||
fn default() -> Builder {
|
||||
Builder::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
/// Create a new builder for configuring a Teddy matcher.
|
||||
pub fn new() -> Builder {
|
||||
Builder { fat: None, avx: None }
|
||||
}
|
||||
|
||||
/// Build a matcher for the set of patterns given. If a matcher could not
|
||||
/// be built, then `None` is returned.
|
||||
///
|
||||
/// Generally, a matcher isn't built if the necessary CPU features aren't
|
||||
/// available, an unsupported target or if the searcher is believed to be
|
||||
/// slower than standard techniques (i.e., if there are too many literals).
|
||||
pub fn build(&self, patterns: &Patterns) -> Option<Teddy> {
|
||||
self.build_imp(patterns)
|
||||
}
|
||||
|
||||
/// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses
|
||||
/// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful
|
||||
/// for a larger set of literals.
|
||||
///
|
||||
/// `None` is the default, which results in an automatic selection based
|
||||
/// on the number of literals and available CPU features.
|
||||
pub fn fat(&mut self, yes: Option<bool>) -> &mut Builder {
|
||||
self.fat = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Request the use of 256-bit vectors (true) or 128-bit vectors (false).
|
||||
/// Generally, a larger vector size is better since it either permits
|
||||
/// matching more patterns or matching more bytes in the haystack at once.
|
||||
///
|
||||
/// `None` is the default, which results in an automatic selection based on
|
||||
/// the number of literals and available CPU features.
|
||||
pub fn avx(&mut self, yes: Option<bool>) -> &mut Builder {
|
||||
self.avx = yes;
|
||||
self
|
||||
}
|
||||
|
||||
fn build_imp(&self, patterns: &Patterns) -> Option<Teddy> {
|
||||
use packed::teddy::runtime;
|
||||
|
||||
// Most of the logic here is just about selecting the optimal settings,
|
||||
// or perhaps even rejecting construction altogether. The choices
|
||||
// we have are: fat (avx only) or not, ssse3 or avx2, and how many
|
||||
// patterns we allow ourselves to search. Additionally, for testing
|
||||
// and benchmarking, we permit callers to try to "force" a setting,
|
||||
// and if the setting isn't allowed (e.g., forcing AVX when AVX isn't
|
||||
// available), then we bail and return nothing.
|
||||
|
||||
if patterns.len() > 64 {
|
||||
return None;
|
||||
}
|
||||
let has_ssse3 = is_x86_feature_detected!("ssse3");
|
||||
let has_avx = is_x86_feature_detected!("avx2");
|
||||
let avx = if self.avx == Some(true) {
|
||||
if !has_avx {
|
||||
return None;
|
||||
}
|
||||
true
|
||||
} else if self.avx == Some(false) {
|
||||
if !has_ssse3 {
|
||||
return None;
|
||||
}
|
||||
false
|
||||
} else if !has_ssse3 && !has_avx {
|
||||
return None;
|
||||
} else {
|
||||
has_avx
|
||||
};
|
||||
let fat = match self.fat {
|
||||
None => avx && patterns.len() > 32,
|
||||
Some(false) => false,
|
||||
Some(true) if !avx => return None,
|
||||
Some(true) => true,
|
||||
};
|
||||
|
||||
let mut compiler = Compiler::new(patterns, fat);
|
||||
compiler.compile();
|
||||
let Compiler { buckets, masks, .. } = compiler;
|
||||
// SAFETY: It is required that the builder only produce Teddy matchers
|
||||
// that are allowed to run on the current CPU, since we later assume
|
||||
// that the presence of (for example) TeddySlim1Mask256 means it is
|
||||
// safe to call functions marked with the `avx2` target feature.
|
||||
match (masks.len(), avx, fat) {
|
||||
(1, false, _) => Some(Teddy {
|
||||
buckets: buckets,
|
||||
max_pattern_id: patterns.max_pattern_id(),
|
||||
exec: runtime::Exec::TeddySlim1Mask128(
|
||||
runtime::TeddySlim1Mask128 {
|
||||
mask1: runtime::Mask128::new(masks[0]),
|
||||
},
|
||||
),
|
||||
}),
|
||||
(1, true, false) => Some(Teddy {
|
||||
buckets: buckets,
|
||||
max_pattern_id: patterns.max_pattern_id(),
|
||||
exec: runtime::Exec::TeddySlim1Mask256(
|
||||
runtime::TeddySlim1Mask256 {
|
||||
mask1: runtime::Mask256::new(masks[0]),
|
||||
},
|
||||
),
|
||||
}),
|
||||
(1, true, true) => Some(Teddy {
|
||||
buckets: buckets,
|
||||
max_pattern_id: patterns.max_pattern_id(),
|
||||
exec: runtime::Exec::TeddyFat1Mask256(
|
||||
runtime::TeddyFat1Mask256 {
|
||||
mask1: runtime::Mask256::new(masks[0]),
|
||||
},
|
||||
),
|
||||
}),
|
||||
(2, false, _) => Some(Teddy {
|
||||
buckets: buckets,
|
||||
max_pattern_id: patterns.max_pattern_id(),
|
||||
exec: runtime::Exec::TeddySlim2Mask128(
|
||||
runtime::TeddySlim2Mask128 {
|
||||
mask1: runtime::Mask128::new(masks[0]),
|
||||
mask2: runtime::Mask128::new(masks[1]),
|
||||
},
|
||||
),
|
||||
}),
|
||||
(2, true, false) => Some(Teddy {
|
||||
buckets: buckets,
|
||||
max_pattern_id: patterns.max_pattern_id(),
|
||||
exec: runtime::Exec::TeddySlim2Mask256(
|
||||
runtime::TeddySlim2Mask256 {
|
||||
mask1: runtime::Mask256::new(masks[0]),
|
||||
mask2: runtime::Mask256::new(masks[1]),
|
||||
},
|
||||
),
|
||||
}),
|
||||
(2, true, true) => Some(Teddy {
|
||||
buckets: buckets,
|
||||
max_pattern_id: patterns.max_pattern_id(),
|
||||
exec: runtime::Exec::TeddyFat2Mask256(
|
||||
runtime::TeddyFat2Mask256 {
|
||||
mask1: runtime::Mask256::new(masks[0]),
|
||||
mask2: runtime::Mask256::new(masks[1]),
|
||||
},
|
||||
),
|
||||
}),
|
||||
(3, false, _) => Some(Teddy {
|
||||
buckets: buckets,
|
||||
max_pattern_id: patterns.max_pattern_id(),
|
||||
exec: runtime::Exec::TeddySlim3Mask128(
|
||||
runtime::TeddySlim3Mask128 {
|
||||
mask1: runtime::Mask128::new(masks[0]),
|
||||
mask2: runtime::Mask128::new(masks[1]),
|
||||
mask3: runtime::Mask128::new(masks[2]),
|
||||
},
|
||||
),
|
||||
}),
|
||||
(3, true, false) => Some(Teddy {
|
||||
buckets: buckets,
|
||||
max_pattern_id: patterns.max_pattern_id(),
|
||||
exec: runtime::Exec::TeddySlim3Mask256(
|
||||
runtime::TeddySlim3Mask256 {
|
||||
mask1: runtime::Mask256::new(masks[0]),
|
||||
mask2: runtime::Mask256::new(masks[1]),
|
||||
mask3: runtime::Mask256::new(masks[2]),
|
||||
},
|
||||
),
|
||||
}),
|
||||
(3, true, true) => Some(Teddy {
|
||||
buckets: buckets,
|
||||
max_pattern_id: patterns.max_pattern_id(),
|
||||
exec: runtime::Exec::TeddyFat3Mask256(
|
||||
runtime::TeddyFat3Mask256 {
|
||||
mask1: runtime::Mask256::new(masks[0]),
|
||||
mask2: runtime::Mask256::new(masks[1]),
|
||||
mask3: runtime::Mask256::new(masks[2]),
|
||||
},
|
||||
),
|
||||
}),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A compiler is in charge of allocating patterns into buckets and generating
|
||||
/// the masks necessary for searching.
|
||||
#[derive(Clone)]
|
||||
struct Compiler<'p> {
|
||||
patterns: &'p Patterns,
|
||||
buckets: Vec<Vec<PatternID>>,
|
||||
masks: Vec<Mask>,
|
||||
}
|
||||
|
||||
impl<'p> Compiler<'p> {
|
||||
/// Create a new Teddy compiler for the given patterns. If `fat` is true,
|
||||
/// then 16 buckets will be used instead of 8.
|
||||
///
|
||||
/// This panics if any of the patterns given are empty.
|
||||
fn new(patterns: &'p Patterns, fat: bool) -> Compiler<'p> {
|
||||
let mask_len = cmp::min(3, patterns.minimum_len());
|
||||
assert!(1 <= mask_len && mask_len <= 3);
|
||||
|
||||
Compiler {
|
||||
patterns,
|
||||
buckets: vec![vec![]; if fat { 16 } else { 8 }],
|
||||
masks: vec![Mask::default(); mask_len],
|
||||
}
|
||||
}
|
||||
|
||||
/// Compile the patterns in this compiler into buckets and masks.
|
||||
fn compile(&mut self) {
|
||||
let mut lonibble_to_bucket: BTreeMap<Vec<u8>, usize> = BTreeMap::new();
|
||||
for (id, pattern) in self.patterns.iter() {
|
||||
// We try to be slightly clever in how we assign patterns into
|
||||
// buckets. Generally speaking, we want patterns with the same
|
||||
// prefix to be in the same bucket, since it minimizes the amount
|
||||
// of time we spend churning through buckets in the verification
|
||||
// step.
|
||||
//
|
||||
// So we could assign patterns with the same N-prefix (where N
|
||||
// is the size of the mask, which is one of {1, 2, 3}) to the
|
||||
// same bucket. However, case insensitive searches are fairly
|
||||
// common, so we'd for example, ideally want to treat `abc` and
|
||||
// `ABC` as if they shared the same prefix. ASCII has the nice
|
||||
// property that the lower 4 bits of A and a are the same, so we
|
||||
// therefore group patterns with the same low-nybbe-N-prefix into
|
||||
// the same bucket.
|
||||
//
|
||||
// MOREOVER, this is actually necessary for correctness! In
|
||||
// particular, by grouping patterns with the same prefix into the
|
||||
// same bucket, we ensure that we preserve correct leftmost-first
|
||||
// and leftmost-longest match semantics. In addition to the fact
|
||||
// that `patterns.iter()` iterates in the correct order, this
|
||||
// guarantees that all possible ambiguous matches will occur in
|
||||
// the same bucket. The verification routine could be adjusted to
|
||||
// support correct leftmost match semantics regardless of bucket
|
||||
// allocation, but that results in a performance hit. It's much
|
||||
// nicer to be able to just stop as soon as a match is found.
|
||||
let lonybs = pattern.low_nybbles(self.masks.len());
|
||||
if let Some(&bucket) = lonibble_to_bucket.get(&lonybs) {
|
||||
self.buckets[bucket].push(id);
|
||||
} else {
|
||||
// N.B. We assign buckets in reverse because it shouldn't have
|
||||
// any influence on performance, but it does make it harder to
|
||||
// get leftmost match semantics accidentally correct.
|
||||
let bucket = (self.buckets.len() - 1)
|
||||
- (id as usize % self.buckets.len());
|
||||
self.buckets[bucket].push(id);
|
||||
lonibble_to_bucket.insert(lonybs, bucket);
|
||||
}
|
||||
}
|
||||
for (bucket_index, bucket) in self.buckets.iter().enumerate() {
|
||||
for &pat_id in bucket {
|
||||
let pat = self.patterns.get(pat_id);
|
||||
for (i, mask) in self.masks.iter_mut().enumerate() {
|
||||
if self.buckets.len() == 8 {
|
||||
mask.add_slim(bucket_index as u8, pat.bytes()[i]);
|
||||
} else {
|
||||
mask.add_fat(bucket_index as u8, pat.bytes()[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p> fmt::Debug for Compiler<'p> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut buckets = vec![vec![]; self.buckets.len()];
|
||||
for (i, bucket) in self.buckets.iter().enumerate() {
|
||||
for &patid in bucket {
|
||||
buckets[i].push(self.patterns.get(patid));
|
||||
}
|
||||
}
|
||||
f.debug_struct("Compiler")
|
||||
.field("buckets", &buckets)
|
||||
.field("masks", &self.masks)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Mask represents the low and high nybble masks that will be used during
|
||||
/// search. Each mask is 32 bytes wide, although only the first 16 bytes are
|
||||
/// used for the SSSE3 runtime.
|
||||
///
|
||||
/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set
|
||||
/// if and only if the corresponding nybble is in the ith bucket. The index of
|
||||
/// the byte (0-15, inclusive) corresponds to the nybble.
|
||||
///
|
||||
/// Each mask is used as the target of a shuffle, where the indices for the
|
||||
/// shuffle are taken from the haystack. AND'ing the shuffles for both the
|
||||
/// low and high masks together also results in 8-bit bitsets, but where bit
|
||||
/// `i` is set if and only if the correspond *byte* is in the ith bucket.
|
||||
///
|
||||
/// During compilation, masks are just arrays. But during search, these masks
|
||||
/// are represented as 128-bit or 256-bit vectors.
|
||||
///
|
||||
/// (See the README is this directory for more details.)
|
||||
#[derive(Clone, Copy, Default)]
|
||||
pub struct Mask {
|
||||
lo: [u8; 32],
|
||||
hi: [u8; 32],
|
||||
}
|
||||
|
||||
impl Mask {
|
||||
/// Update this mask by adding the given byte to the given bucket. The
|
||||
/// given bucket must be in the range 0-7.
|
||||
///
|
||||
/// This is for "slim" Teddy, where there are only 8 buckets.
|
||||
fn add_slim(&mut self, bucket: u8, byte: u8) {
|
||||
assert!(bucket < 8);
|
||||
|
||||
let byte_lo = (byte & 0xF) as usize;
|
||||
let byte_hi = ((byte >> 4) & 0xF) as usize;
|
||||
// When using 256-bit vectors, we need to set this bucket assignment in
|
||||
// the low and high 128-bit portions of the mask. This allows us to
|
||||
// process 32 bytes at a time. Namely, AVX2 shuffles operate on each
|
||||
// of the 128-bit lanes, rather than the full 256-bit vector at once.
|
||||
self.lo[byte_lo] |= 1 << bucket;
|
||||
self.lo[byte_lo + 16] |= 1 << bucket;
|
||||
self.hi[byte_hi] |= 1 << bucket;
|
||||
self.hi[byte_hi + 16] |= 1 << bucket;
|
||||
}
|
||||
|
||||
/// Update this mask by adding the given byte to the given bucket. The
|
||||
/// given bucket must be in the range 0-15.
|
||||
///
|
||||
/// This is for "fat" Teddy, where there are 16 buckets.
|
||||
fn add_fat(&mut self, bucket: u8, byte: u8) {
|
||||
assert!(bucket < 16);
|
||||
|
||||
let byte_lo = (byte & 0xF) as usize;
|
||||
let byte_hi = ((byte >> 4) & 0xF) as usize;
|
||||
// Unlike slim teddy, fat teddy only works with AVX2. For fat teddy,
|
||||
// the high 128 bits of our mask correspond to buckets 8-15, while the
|
||||
// low 128 bits correspond to buckets 0-7.
|
||||
if bucket < 8 {
|
||||
self.lo[byte_lo] |= 1 << bucket;
|
||||
self.hi[byte_hi] |= 1 << bucket;
|
||||
} else {
|
||||
self.lo[byte_lo + 16] |= 1 << (bucket % 8);
|
||||
self.hi[byte_hi + 16] |= 1 << (bucket % 8);
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the low 128 bits of the low-nybble mask.
|
||||
pub fn lo128(&self) -> [u8; 16] {
|
||||
let mut tmp = [0; 16];
|
||||
tmp.copy_from_slice(&self.lo[..16]);
|
||||
tmp
|
||||
}
|
||||
|
||||
/// Return the full low-nybble mask.
|
||||
pub fn lo256(&self) -> [u8; 32] {
|
||||
self.lo
|
||||
}
|
||||
|
||||
/// Return the low 128 bits of the high-nybble mask.
|
||||
pub fn hi128(&self) -> [u8; 16] {
|
||||
let mut tmp = [0; 16];
|
||||
tmp.copy_from_slice(&self.hi[..16]);
|
||||
tmp
|
||||
}
|
||||
|
||||
/// Return the full high-nybble mask.
|
||||
pub fn hi256(&self) -> [u8; 32] {
|
||||
self.hi
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Mask {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let (mut parts_lo, mut parts_hi) = (vec![], vec![]);
|
||||
for i in 0..32 {
|
||||
parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i]));
|
||||
parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i]));
|
||||
}
|
||||
f.debug_struct("Mask")
|
||||
.field("lo", &parts_lo)
|
||||
.field("hi", &parts_hi)
|
||||
.finish()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
#[cfg(target_arch = "x86_64")]
|
||||
pub use packed::teddy::compile::Builder;
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
pub use packed::teddy::fallback::Builder;
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
pub use packed::teddy::fallback::Teddy;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub use packed::teddy::runtime::Teddy;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
mod compile;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
mod runtime;
|
||||
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
mod fallback {
|
||||
use packed::pattern::Patterns;
|
||||
use Match;
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Builder(());
|
||||
|
||||
impl Builder {
|
||||
pub fn new() -> Builder {
|
||||
Builder(())
|
||||
}
|
||||
|
||||
pub fn build(&self, _: &Patterns) -> Option<Teddy> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn fat(&mut self, _: Option<bool>) -> &mut Builder {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn avx(&mut self, _: Option<bool>) -> &mut Builder {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Teddy(());
|
||||
|
||||
impl Teddy {
|
||||
pub fn find_at(
|
||||
&self,
|
||||
_: &Patterns,
|
||||
_: &[u8],
|
||||
_: usize,
|
||||
) -> Option<Match> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn minimum_len(&self) -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
pub fn heap_bytes(&self) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,568 @@
|
|||
use std::collections::HashMap;
|
||||
use std::usize;
|
||||
|
||||
use packed::{Config, MatchKind};
|
||||
use Match;
|
||||
|
||||
/// A description of a single test against a multi-pattern searcher.
|
||||
///
|
||||
/// A single test may not necessarily pass on every configuration of a
|
||||
/// searcher. The tests are categorized and grouped appropriately below.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct SearchTest {
|
||||
/// The name of this test, for debugging.
|
||||
name: &'static str,
|
||||
/// The patterns to search for.
|
||||
patterns: &'static [&'static str],
|
||||
/// The text to search.
|
||||
haystack: &'static str,
|
||||
/// Each match is a triple of (pattern_index, start, end), where
|
||||
/// pattern_index is an index into `patterns` and `start`/`end` are indices
|
||||
/// into `haystack`.
|
||||
matches: &'static [(usize, usize, usize)],
|
||||
}
|
||||
|
||||
struct SearchTestOwned {
|
||||
offset: usize,
|
||||
name: String,
|
||||
patterns: Vec<String>,
|
||||
haystack: String,
|
||||
matches: Vec<(usize, usize, usize)>,
|
||||
}
|
||||
|
||||
impl SearchTest {
|
||||
fn variations(&self) -> Vec<SearchTestOwned> {
|
||||
let mut tests = vec![];
|
||||
for i in 0..=260 {
|
||||
tests.push(self.offset_prefix(i));
|
||||
tests.push(self.offset_suffix(i));
|
||||
tests.push(self.offset_both(i));
|
||||
}
|
||||
tests
|
||||
}
|
||||
|
||||
fn offset_both(&self, off: usize) -> SearchTestOwned {
|
||||
SearchTestOwned {
|
||||
offset: off,
|
||||
name: self.name.to_string(),
|
||||
patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
|
||||
haystack: format!(
|
||||
"{}{}{}",
|
||||
"Z".repeat(off),
|
||||
self.haystack,
|
||||
"Z".repeat(off)
|
||||
),
|
||||
matches: self
|
||||
.matches
|
||||
.iter()
|
||||
.map(|&(id, s, e)| (id, s + off, e + off))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn offset_prefix(&self, off: usize) -> SearchTestOwned {
|
||||
SearchTestOwned {
|
||||
offset: off,
|
||||
name: self.name.to_string(),
|
||||
patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
|
||||
haystack: format!("{}{}", "Z".repeat(off), self.haystack),
|
||||
matches: self
|
||||
.matches
|
||||
.iter()
|
||||
.map(|&(id, s, e)| (id, s + off, e + off))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn offset_suffix(&self, off: usize) -> SearchTestOwned {
|
||||
SearchTestOwned {
|
||||
offset: off,
|
||||
name: self.name.to_string(),
|
||||
patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
|
||||
haystack: format!("{}{}", self.haystack, "Z".repeat(off)),
|
||||
matches: self.matches.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
// fn to_owned(&self) -> SearchTestOwned {
|
||||
// SearchTestOwned {
|
||||
// name: self.name.to_string(),
|
||||
// patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
|
||||
// haystack: self.haystack.to_string(),
|
||||
// matches: self.matches.iter().cloned().collect(),
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
/// Short-hand constructor for SearchTest. We use it a lot below.
|
||||
macro_rules! t {
|
||||
($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
|
||||
SearchTest {
|
||||
name: stringify!($name),
|
||||
patterns: $patterns,
|
||||
haystack: $haystack,
|
||||
matches: $matches,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// A collection of test groups.
|
||||
type TestCollection = &'static [&'static [SearchTest]];
|
||||
|
||||
// Define several collections corresponding to the different type of match
|
||||
// semantics supported. These collections have some overlap, but each
|
||||
// collection should have some tests that no other collection has.
|
||||
|
||||
/// Tests for leftmost-first match semantics.
|
||||
const PACKED_LEFTMOST_FIRST: TestCollection =
|
||||
&[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY];
|
||||
|
||||
/// Tests for leftmost-longest match semantics.
|
||||
const PACKED_LEFTMOST_LONGEST: TestCollection =
|
||||
&[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY];
|
||||
|
||||
// Now define the individual tests that make up the collections above.
|
||||
|
||||
/// A collection of tests for the that should always be true regardless of
|
||||
/// match semantics. That is, all combinations of leftmost-{first, longest}
|
||||
/// should produce the same answer.
|
||||
const BASICS: &'static [SearchTest] = &[
|
||||
t!(basic001, &["a"], "", &[]),
|
||||
t!(basic010, &["a"], "a", &[(0, 0, 1)]),
|
||||
t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
|
||||
t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
|
||||
t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
|
||||
t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
|
||||
t!(basic060, &["a"], "bbb", &[]),
|
||||
t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
|
||||
t!(basic100, &["aa"], "", &[]),
|
||||
t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
|
||||
t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
|
||||
t!(basic130, &["aa"], "abbab", &[]),
|
||||
t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
|
||||
t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]),
|
||||
t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
|
||||
t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
|
||||
t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
|
||||
t!(basic300, &["a", "b"], "", &[]),
|
||||
t!(basic310, &["a", "b"], "z", &[]),
|
||||
t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
|
||||
t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
|
||||
t!(
|
||||
basic340,
|
||||
&["a", "b"],
|
||||
"abba",
|
||||
&[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
|
||||
),
|
||||
t!(
|
||||
basic350,
|
||||
&["b", "a"],
|
||||
"abba",
|
||||
&[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
|
||||
),
|
||||
t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
|
||||
t!(basic400, &["foo", "bar"], "", &[]),
|
||||
t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
|
||||
t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
|
||||
t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
|
||||
t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
|
||||
t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
|
||||
t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
|
||||
t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
|
||||
t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
|
||||
t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
|
||||
t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
|
||||
t!(
|
||||
basic720,
|
||||
&["yabcdef", "bcdeyabc", "abcdezghi"],
|
||||
"yabcdezghi",
|
||||
&[(2, 1, 10),]
|
||||
),
|
||||
t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
|
||||
t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
|
||||
t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
|
||||
t!(
|
||||
basic840,
|
||||
&["ab", "ba"],
|
||||
"abababa",
|
||||
&[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
|
||||
),
|
||||
t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
|
||||
];
|
||||
|
||||
/// Tests for leftmost match semantics. These should pass for both
|
||||
/// leftmost-first and leftmost-longest match kinds. Stated differently, among
|
||||
/// ambiguous matches, the longest match and the match that appeared first when
|
||||
/// constructing the automaton should always be the same.
|
||||
const LEFTMOST: &'static [SearchTest] = &[
|
||||
t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
|
||||
t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
|
||||
t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
|
||||
t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
|
||||
t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
|
||||
t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
|
||||
t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
|
||||
t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
|
||||
t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
|
||||
t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
|
||||
t!(
|
||||
leftmost360,
|
||||
&["abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(2, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
leftmost370,
|
||||
&["abcdefghi", "cde", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(3, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
leftmost380,
|
||||
&["abcdefghi", "hz", "abcdefgh", "a"],
|
||||
"abcdefghz",
|
||||
&[(2, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
leftmost390,
|
||||
&["b", "abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(3, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
leftmost400,
|
||||
&["h", "abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(3, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
leftmost410,
|
||||
&["z", "abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(3, 0, 8), (0, 8, 9),]
|
||||
),
|
||||
];
|
||||
|
||||
/// Tests for non-overlapping leftmost-first match semantics. These tests
|
||||
/// should generally be specific to leftmost-first, which means they should
|
||||
/// generally fail under leftmost-longest semantics.
|
||||
const LEFTMOST_FIRST: &'static [SearchTest] = &[
|
||||
t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
|
||||
t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
|
||||
t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
|
||||
t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
|
||||
t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
|
||||
t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
|
||||
t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
|
||||
t!(
|
||||
leftfirst310,
|
||||
&["abcd", "b", "bce", "ce"],
|
||||
"abce",
|
||||
&[(1, 1, 2), (3, 2, 4),]
|
||||
),
|
||||
t!(
|
||||
leftfirst320,
|
||||
&["a", "abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(0, 0, 1), (2, 7, 9),]
|
||||
),
|
||||
t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
|
||||
t!(
|
||||
leftfirst340,
|
||||
&["abcdef", "x", "x", "x", "x", "x", "x", "abcde"],
|
||||
"abcdef",
|
||||
&[(0, 0, 6)]
|
||||
),
|
||||
];
|
||||
|
||||
/// Tests for non-overlapping leftmost-longest match semantics. These tests
|
||||
/// should generally be specific to leftmost-longest, which means they should
|
||||
/// generally fail under leftmost-first semantics.
|
||||
const LEFTMOST_LONGEST: &'static [SearchTest] = &[
|
||||
t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
|
||||
t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
|
||||
t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
|
||||
t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
|
||||
t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
|
||||
t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
|
||||
t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
|
||||
t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
|
||||
t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
|
||||
t!(
|
||||
leftlong310,
|
||||
&["a", "abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(3, 0, 8),]
|
||||
),
|
||||
t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
|
||||
t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
|
||||
t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
|
||||
];
|
||||
|
||||
/// Regression tests that are applied to all combinations.
|
||||
///
|
||||
/// If regression tests are needed for specific match semantics, then add them
|
||||
/// to the appropriate group above.
|
||||
const REGRESSION: &'static [SearchTest] = &[
|
||||
t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
|
||||
t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
|
||||
t!(
|
||||
regression030,
|
||||
&["libcore/", "libstd/"],
|
||||
"libcore/char/methods.rs",
|
||||
&[(0, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
regression040,
|
||||
&["libstd/", "libcore/"],
|
||||
"libcore/char/methods.rs",
|
||||
&[(1, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
regression050,
|
||||
&["\x00\x00\x01", "\x00\x00\x00"],
|
||||
"\x00\x00\x00",
|
||||
&[(1, 0, 3),]
|
||||
),
|
||||
t!(
|
||||
regression060,
|
||||
&["\x00\x00\x00", "\x00\x00\x01"],
|
||||
"\x00\x00\x00",
|
||||
&[(0, 0, 3),]
|
||||
),
|
||||
];
|
||||
|
||||
const TEDDY: &'static [SearchTest] = &[
|
||||
t!(
|
||||
teddy010,
|
||||
&["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
|
||||
"abcdefghijk",
|
||||
&[
|
||||
(0, 0, 1),
|
||||
(1, 1, 2),
|
||||
(2, 2, 3),
|
||||
(3, 3, 4),
|
||||
(4, 4, 5),
|
||||
(5, 5, 6),
|
||||
(6, 6, 7),
|
||||
(7, 7, 8),
|
||||
(8, 8, 9),
|
||||
(9, 9, 10),
|
||||
(10, 10, 11)
|
||||
]
|
||||
),
|
||||
t!(
|
||||
teddy020,
|
||||
&["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"],
|
||||
"abcdefghijk",
|
||||
&[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),]
|
||||
),
|
||||
t!(
|
||||
teddy030,
|
||||
&["abc"],
|
||||
"abcdefghijklmnopqrstuvwxyzabcdefghijk",
|
||||
&[(0, 0, 3), (0, 26, 29)]
|
||||
),
|
||||
];
|
||||
|
||||
// Now define a test for each combination of things above that we want to run.
|
||||
// Since there are a few different combinations for each collection of tests,
|
||||
// we define a couple of macros to avoid repetition drudgery. The testconfig
|
||||
// macro constructs the automaton from a given match kind, and runs the search
|
||||
// tests one-by-one over the given collection. The `with` parameter allows one
|
||||
// to configure the config with additional parameters. The testcombo macro
|
||||
// invokes testconfig in precisely this way: it sets up several tests where
|
||||
// each one turns a different knob on Config.
|
||||
|
||||
macro_rules! testconfig {
|
||||
($name:ident, $collection:expr, $with:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
run_search_tests($collection, |test| {
|
||||
let mut config = Config::new();
|
||||
$with(&mut config);
|
||||
config
|
||||
.builder()
|
||||
.extend(test.patterns.iter().map(|p| p.as_bytes()))
|
||||
.build()
|
||||
.unwrap()
|
||||
.find_iter(&test.haystack)
|
||||
.collect()
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
testconfig!(
|
||||
search_default_leftmost_first,
|
||||
PACKED_LEFTMOST_FIRST,
|
||||
|_: &mut Config| {}
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
testconfig!(
|
||||
search_default_leftmost_longest,
|
||||
PACKED_LEFTMOST_LONGEST,
|
||||
|c: &mut Config| {
|
||||
c.match_kind(MatchKind::LeftmostLongest);
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
testconfig!(
|
||||
search_teddy_leftmost_first,
|
||||
PACKED_LEFTMOST_FIRST,
|
||||
|c: &mut Config| {
|
||||
c.force_teddy(true);
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
testconfig!(
|
||||
search_teddy_leftmost_longest,
|
||||
PACKED_LEFTMOST_LONGEST,
|
||||
|c: &mut Config| {
|
||||
c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
testconfig!(
|
||||
search_teddy_ssse3_leftmost_first,
|
||||
PACKED_LEFTMOST_FIRST,
|
||||
|c: &mut Config| {
|
||||
c.force_teddy(true);
|
||||
if is_x86_feature_detected!("ssse3") {
|
||||
c.force_avx(Some(false));
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
testconfig!(
|
||||
search_teddy_ssse3_leftmost_longest,
|
||||
PACKED_LEFTMOST_LONGEST,
|
||||
|c: &mut Config| {
|
||||
c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
|
||||
if is_x86_feature_detected!("ssse3") {
|
||||
c.force_avx(Some(false));
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
testconfig!(
|
||||
search_teddy_avx2_leftmost_first,
|
||||
PACKED_LEFTMOST_FIRST,
|
||||
|c: &mut Config| {
|
||||
c.force_teddy(true);
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
c.force_avx(Some(true));
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
testconfig!(
|
||||
search_teddy_avx2_leftmost_longest,
|
||||
PACKED_LEFTMOST_LONGEST,
|
||||
|c: &mut Config| {
|
||||
c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
c.force_avx(Some(true));
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
testconfig!(
|
||||
search_teddy_fat_leftmost_first,
|
||||
PACKED_LEFTMOST_FIRST,
|
||||
|c: &mut Config| {
|
||||
c.force_teddy(true);
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
c.force_teddy_fat(Some(true));
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
testconfig!(
|
||||
search_teddy_fat_leftmost_longest,
|
||||
PACKED_LEFTMOST_LONGEST,
|
||||
|c: &mut Config| {
|
||||
c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
c.force_teddy_fat(Some(true));
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
testconfig!(
|
||||
search_rabinkarp_leftmost_first,
|
||||
PACKED_LEFTMOST_FIRST,
|
||||
|c: &mut Config| {
|
||||
c.force_rabin_karp(true);
|
||||
}
|
||||
);
|
||||
|
||||
testconfig!(
|
||||
search_rabinkarp_leftmost_longest,
|
||||
PACKED_LEFTMOST_LONGEST,
|
||||
|c: &mut Config| {
|
||||
c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest);
|
||||
}
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn search_tests_have_unique_names() {
|
||||
let assert = |constname, tests: &[SearchTest]| {
|
||||
let mut seen = HashMap::new(); // map from test name to position
|
||||
for (i, test) in tests.iter().enumerate() {
|
||||
if !seen.contains_key(test.name) {
|
||||
seen.insert(test.name, i);
|
||||
} else {
|
||||
let last = seen[test.name];
|
||||
panic!(
|
||||
"{} tests have duplicate names at positions {} and {}",
|
||||
constname, last, i
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
assert("BASICS", BASICS);
|
||||
assert("LEFTMOST", LEFTMOST);
|
||||
assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
|
||||
assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
|
||||
assert("REGRESSION", REGRESSION);
|
||||
assert("TEDDY", TEDDY);
|
||||
}
|
||||
|
||||
fn run_search_tests<F: FnMut(&SearchTestOwned) -> Vec<Match>>(
|
||||
which: TestCollection,
|
||||
mut f: F,
|
||||
) {
|
||||
let get_match_triples =
|
||||
|matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
|
||||
matches
|
||||
.into_iter()
|
||||
.map(|m| (m.pattern(), m.start(), m.end()))
|
||||
.collect()
|
||||
};
|
||||
for &tests in which {
|
||||
for spec in tests {
|
||||
for test in spec.variations() {
|
||||
assert_eq!(
|
||||
test.matches,
|
||||
get_match_triples(f(&test)).as_slice(),
|
||||
"test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}",
|
||||
test.name,
|
||||
test.patterns,
|
||||
test.haystack,
|
||||
test.offset,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,181 @@
|
|||
// This file contains a set of fairly generic utility functions when working
|
||||
// with SIMD vectors.
|
||||
//
|
||||
// SAFETY: All of the routines below are unsafe to call because they assume
|
||||
// the necessary CPU target features in order to use particular vendor
|
||||
// intrinsics. Calling these routines when the underlying CPU does not support
|
||||
// the appropriate target features is NOT safe. Callers must ensure this
|
||||
// themselves.
|
||||
//
|
||||
// Note that it may not look like this safety invariant is being upheld when
|
||||
// these routines are called. Namely, the CPU feature check is typically pretty
|
||||
// far away from when these routines are used. Instead, we rely on the fact
|
||||
// that certain types serve as a guaranteed receipt that pertinent target
|
||||
// features are enabled. For example, the only way TeddySlim3Mask256 can be
|
||||
// constructed is if the AVX2 CPU feature is available. Thus, any code running
|
||||
// inside of TeddySlim3Mask256 can use any of the functions below without any
|
||||
// additional checks: its very existence *is* the check.
|
||||
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
/// Shift `a` to the left by two bytes (removing its two most significant
|
||||
/// bytes), and concatenate it with the the two most significant bytes of `b`.
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn alignr256_14(a: __m256i, b: __m256i) -> __m256i {
|
||||
// Credit goes to jneem for figuring this out:
|
||||
// https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
|
||||
//
|
||||
// TL;DR avx2's PALIGNR instruction is actually just two 128-bit PALIGNR
|
||||
// instructions, which is not what we want, so we need to do some extra
|
||||
// shuffling.
|
||||
|
||||
// This permute gives us the low 16 bytes of a concatenated with the high
|
||||
// 16 bytes of b, in order of most significant to least significant. So
|
||||
// `v = a[15:0] b[31:16]`.
|
||||
let v = _mm256_permute2x128_si256(b, a, 0x21);
|
||||
// This effectively does this (where we deal in terms of byte-indexing
|
||||
// and byte-shifting, and use inclusive ranges):
|
||||
//
|
||||
// ret[15:0] := ((a[15:0] << 16) | v[15:0]) >> 14
|
||||
// = ((a[15:0] << 16) | b[31:16]) >> 14
|
||||
// ret[31:16] := ((a[31:16] << 16) | v[31:16]) >> 14
|
||||
// = ((a[31:16] << 16) | a[15:0]) >> 14
|
||||
//
|
||||
// Which therefore results in:
|
||||
//
|
||||
// ret[31:0] := a[29:16] a[15:14] a[13:0] b[31:30]
|
||||
//
|
||||
// The end result is that we've effectively done this:
|
||||
//
|
||||
// (a << 2) | (b >> 30)
|
||||
//
|
||||
// When `A` and `B` are strings---where the beginning of the string is in
|
||||
// the least significant bits---we effectively result in the following
|
||||
// semantic operation:
|
||||
//
|
||||
// (A >> 2) | (B << 30)
|
||||
//
|
||||
// The reversal being attributed to the fact that we are in little-endian.
|
||||
_mm256_alignr_epi8(a, v, 14)
|
||||
}
|
||||
|
||||
/// Shift `a` to the left by one byte (removing its most significant byte), and
|
||||
/// concatenate it with the the most significant byte of `b`.
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn alignr256_15(a: __m256i, b: __m256i) -> __m256i {
|
||||
// For explanation, see alignr256_14.
|
||||
let v = _mm256_permute2x128_si256(b, a, 0x21);
|
||||
_mm256_alignr_epi8(a, v, 15)
|
||||
}
|
||||
|
||||
/// Unpack the given 128-bit vector into its 64-bit components. The first
|
||||
/// element of the array returned corresponds to the least significant 64-bit
|
||||
/// lane in `a`.
|
||||
#[target_feature(enable = "ssse3")]
|
||||
pub unsafe fn unpack64x128(a: __m128i) -> [u64; 2] {
|
||||
[
|
||||
_mm_cvtsi128_si64(a) as u64,
|
||||
_mm_cvtsi128_si64(_mm_srli_si128(a, 8)) as u64,
|
||||
]
|
||||
}
|
||||
|
||||
/// Unpack the given 256-bit vector into its 64-bit components. The first
|
||||
/// element of the array returned corresponds to the least significant 64-bit
|
||||
/// lane in `a`.
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn unpack64x256(a: __m256i) -> [u64; 4] {
|
||||
// Using transmute here is precisely equivalent, but actually slower. It's
|
||||
// not quite clear why.
|
||||
let lo = _mm256_extracti128_si256(a, 0);
|
||||
let hi = _mm256_extracti128_si256(a, 1);
|
||||
[
|
||||
_mm_cvtsi128_si64(lo) as u64,
|
||||
_mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64,
|
||||
_mm_cvtsi128_si64(hi) as u64,
|
||||
_mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64,
|
||||
]
|
||||
}
|
||||
|
||||
/// Unpack the low 128-bits of `a` and `b`, and return them as 4 64-bit
|
||||
/// integers.
|
||||
///
|
||||
/// More precisely, if a = a4 a3 a2 a1 and b = b4 b3 b2 b1, where each element
|
||||
/// is a 64-bit integer and a1/b1 correspond to the least significant 64 bits,
|
||||
/// then the return value is `b2 b1 a2 a1`.
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn unpacklo64x256(a: __m256i, b: __m256i) -> [u64; 4] {
|
||||
let lo = _mm256_castsi256_si128(a);
|
||||
let hi = _mm256_castsi256_si128(b);
|
||||
[
|
||||
_mm_cvtsi128_si64(lo) as u64,
|
||||
_mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64,
|
||||
_mm_cvtsi128_si64(hi) as u64,
|
||||
_mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64,
|
||||
]
|
||||
}
|
||||
|
||||
/// Returns true if and only if all bits in the given 128-bit vector are 0.
|
||||
#[target_feature(enable = "ssse3")]
|
||||
pub unsafe fn is_all_zeroes128(a: __m128i) -> bool {
|
||||
let cmp = _mm_cmpeq_epi8(a, zeroes128());
|
||||
_mm_movemask_epi8(cmp) as u32 == 0xFFFF
|
||||
}
|
||||
|
||||
/// Returns true if and only if all bits in the given 256-bit vector are 0.
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn is_all_zeroes256(a: __m256i) -> bool {
|
||||
let cmp = _mm256_cmpeq_epi8(a, zeroes256());
|
||||
_mm256_movemask_epi8(cmp) as u32 == 0xFFFFFFFF
|
||||
}
|
||||
|
||||
/// Load a 128-bit vector from slice at the given position. The slice does
|
||||
/// not need to be unaligned.
|
||||
///
|
||||
/// Since this code assumes little-endian (there is no big-endian x86), the
|
||||
/// bytes starting in `slice[at..]` will be at the least significant bits of
|
||||
/// the returned vector. This is important for the surrounding code, since for
|
||||
/// example, shifting the resulting vector right is equivalent to logically
|
||||
/// shifting the bytes in `slice` left.
|
||||
#[target_feature(enable = "sse2")]
|
||||
pub unsafe fn loadu128(slice: &[u8], at: usize) -> __m128i {
|
||||
let ptr = slice.get_unchecked(at..).as_ptr();
|
||||
_mm_loadu_si128(ptr as *const u8 as *const __m128i)
|
||||
}
|
||||
|
||||
/// Load a 256-bit vector from slice at the given position. The slice does
|
||||
/// not need to be unaligned.
|
||||
///
|
||||
/// Since this code assumes little-endian (there is no big-endian x86), the
|
||||
/// bytes starting in `slice[at..]` will be at the least significant bits of
|
||||
/// the returned vector. This is important for the surrounding code, since for
|
||||
/// example, shifting the resulting vector right is equivalent to logically
|
||||
/// shifting the bytes in `slice` left.
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn loadu256(slice: &[u8], at: usize) -> __m256i {
|
||||
let ptr = slice.get_unchecked(at..).as_ptr();
|
||||
_mm256_loadu_si256(ptr as *const u8 as *const __m256i)
|
||||
}
|
||||
|
||||
/// Returns a 128-bit vector with all bits set to 0.
|
||||
#[target_feature(enable = "sse2")]
|
||||
pub unsafe fn zeroes128() -> __m128i {
|
||||
_mm_set1_epi8(0)
|
||||
}
|
||||
|
||||
/// Returns a 256-bit vector with all bits set to 0.
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn zeroes256() -> __m256i {
|
||||
_mm256_set1_epi8(0)
|
||||
}
|
||||
|
||||
/// Returns a 128-bit vector with all bits set to 1.
|
||||
#[target_feature(enable = "sse2")]
|
||||
pub unsafe fn ones128() -> __m128i {
|
||||
_mm_set1_epi8(0xFF as u8 as i8)
|
||||
}
|
||||
|
||||
/// Returns a 256-bit vector with all bits set to 1.
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn ones256() -> __m256i {
|
||||
_mm256_set1_epi8(0xFF as u8 as i8)
|
||||
}
|
|
@ -0,0 +1,955 @@
|
|||
use std::cmp;
|
||||
use std::fmt;
|
||||
use std::panic::{RefUnwindSafe, UnwindSafe};
|
||||
use std::u8;
|
||||
|
||||
use memchr::{memchr, memchr2, memchr3};
|
||||
|
||||
use ahocorasick::MatchKind;
|
||||
use packed;
|
||||
use Match;
|
||||
|
||||
/// A candidate is the result of running a prefilter on a haystack at a
|
||||
/// particular position. The result is either no match, a confirmed match or
|
||||
/// a possible match.
|
||||
///
|
||||
/// When no match is returned, the prefilter is guaranteeing that no possible
|
||||
/// match can be found in the haystack, and the caller may trust this. That is,
|
||||
/// all correct prefilters must never report false negatives.
|
||||
///
|
||||
/// In some cases, a prefilter can confirm a match very quickly, in which case,
|
||||
/// the caller may use this to stop what it's doing and report the match. In
|
||||
/// this case, prefilter implementations must never report a false positive.
|
||||
/// In other cases, the prefilter can only report a potential match, in which
|
||||
/// case the callers must attempt to confirm the match. In this case, prefilter
|
||||
/// implementations are permitted to return false positives.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Candidate {
|
||||
None,
|
||||
Match(Match),
|
||||
PossibleStartOfMatch(usize),
|
||||
}
|
||||
|
||||
impl Candidate {
|
||||
/// Convert this candidate into an option. This is useful when callers
|
||||
/// do not distinguish between true positives and false positives (i.e.,
|
||||
/// the caller must always confirm the match in order to update some other
|
||||
/// state).
|
||||
pub fn into_option(self) -> Option<usize> {
|
||||
match self {
|
||||
Candidate::None => None,
|
||||
Candidate::Match(ref m) => Some(m.start()),
|
||||
Candidate::PossibleStartOfMatch(start) => Some(start),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A prefilter describes the behavior of fast literal scanners for quickly
|
||||
/// skipping past bytes in the haystack that we know cannot possibly
|
||||
/// participate in a match.
|
||||
pub trait Prefilter:
|
||||
Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug
|
||||
{
|
||||
/// Returns the next possible match candidate. This may yield false
|
||||
/// positives, so callers must confirm a match starting at the position
|
||||
/// returned. This, however, must never produce false negatives. That is,
|
||||
/// this must, at minimum, return the starting position of the next match
|
||||
/// in the given haystack after or at the given position.
|
||||
fn next_candidate(
|
||||
&self,
|
||||
state: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Candidate;
|
||||
|
||||
/// A method for cloning a prefilter, to work-around the fact that Clone
|
||||
/// is not object-safe.
|
||||
fn clone_prefilter(&self) -> Box<dyn Prefilter>;
|
||||
|
||||
/// Returns the approximate total amount of heap used by this prefilter, in
|
||||
/// units of bytes.
|
||||
fn heap_bytes(&self) -> usize;
|
||||
|
||||
/// Returns true if and only if this prefilter never returns false
|
||||
/// positives. This is useful for completely avoiding the automaton
|
||||
/// when the prefilter can quickly confirm its own matches.
|
||||
///
|
||||
/// By default, this returns true, which is conservative; it is always
|
||||
/// correct to return `true`. Returning `false` here and reporting a false
|
||||
/// positive will result in incorrect searches.
|
||||
fn reports_false_positives(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P {
|
||||
#[inline]
|
||||
fn next_candidate(
|
||||
&self,
|
||||
state: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Candidate {
|
||||
(**self).next_candidate(state, haystack, at)
|
||||
}
|
||||
|
||||
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
|
||||
(**self).clone_prefilter()
|
||||
}
|
||||
|
||||
fn heap_bytes(&self) -> usize {
|
||||
(**self).heap_bytes()
|
||||
}
|
||||
|
||||
fn reports_false_positives(&self) -> bool {
|
||||
(**self).reports_false_positives()
|
||||
}
|
||||
}
|
||||
|
||||
/// A convenience object for representing any type that implements Prefilter
|
||||
/// and is cloneable.
|
||||
#[derive(Debug)]
|
||||
pub struct PrefilterObj(Box<dyn Prefilter>);
|
||||
|
||||
impl Clone for PrefilterObj {
|
||||
fn clone(&self) -> Self {
|
||||
PrefilterObj(self.0.clone_prefilter())
|
||||
}
|
||||
}
|
||||
|
||||
impl PrefilterObj {
|
||||
/// Create a new prefilter object.
|
||||
pub fn new<T: Prefilter + 'static>(t: T) -> PrefilterObj {
|
||||
PrefilterObj(Box::new(t))
|
||||
}
|
||||
|
||||
/// Return the underlying prefilter trait object.
|
||||
pub fn as_ref(&self) -> &dyn Prefilter {
|
||||
&*self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// PrefilterState tracks state associated with the effectiveness of a
|
||||
/// prefilter. It is used to track how many bytes, on average, are skipped by
|
||||
/// the prefilter. If this average dips below a certain threshold over time,
|
||||
/// then the state renders the prefilter inert and stops using it.
|
||||
///
|
||||
/// A prefilter state should be created for each search. (Where creating an
|
||||
/// iterator via, e.g., `find_iter`, is treated as a single search.)
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PrefilterState {
|
||||
/// The number of skips that has been executed.
|
||||
skips: usize,
|
||||
/// The total number of bytes that have been skipped.
|
||||
skipped: usize,
|
||||
/// The maximum length of a match. This is used to help determine how many
|
||||
/// bytes on average should be skipped in order for a prefilter to be
|
||||
/// effective.
|
||||
max_match_len: usize,
|
||||
/// Once this heuristic has been deemed permanently ineffective, it will be
|
||||
/// inert throughout the rest of its lifetime. This serves as a cheap way
|
||||
/// to check inertness.
|
||||
inert: bool,
|
||||
/// The last (absolute) position at which a prefilter scanned to.
|
||||
/// Prefilters can use this position to determine whether to re-scan or
|
||||
/// not.
|
||||
///
|
||||
/// Unlike other things that impact effectiveness, this is a fleeting
|
||||
/// condition. That is, a prefilter can be considered ineffective if it is
|
||||
/// at a position before `last_scan_at`, but can become effective again
|
||||
/// once the search moves past `last_scan_at`.
|
||||
///
|
||||
/// The utility of this is to both avoid additional overhead from calling
|
||||
/// the prefilter and to avoid quadratic behavior. This ensures that a
|
||||
/// prefilter will scan any particular byte at most once. (Note that some
|
||||
/// prefilters, like the start-byte prefilter, do not need to use this
|
||||
/// field at all, since it only looks for starting bytes.)
|
||||
last_scan_at: usize,
|
||||
}
|
||||
|
||||
impl PrefilterState {
|
||||
/// The minimum number of skip attempts to try before considering whether
|
||||
/// a prefilter is effective or not.
|
||||
const MIN_SKIPS: usize = 40;
|
||||
|
||||
/// The minimum amount of bytes that skipping must average, expressed as a
|
||||
/// factor of the multiple of the length of a possible match.
|
||||
///
|
||||
/// That is, after MIN_SKIPS have occurred, if the average number of bytes
|
||||
/// skipped ever falls below MIN_AVG_FACTOR * max-match-length, then the
|
||||
/// prefilter outed to be rendered inert.
|
||||
const MIN_AVG_FACTOR: usize = 2;
|
||||
|
||||
/// Create a fresh prefilter state.
|
||||
pub fn new(max_match_len: usize) -> PrefilterState {
|
||||
PrefilterState {
|
||||
skips: 0,
|
||||
skipped: 0,
|
||||
max_match_len,
|
||||
inert: false,
|
||||
last_scan_at: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update this state with the number of bytes skipped on the last
|
||||
/// invocation of the prefilter.
|
||||
#[inline]
|
||||
fn update_skipped_bytes(&mut self, skipped: usize) {
|
||||
self.skips += 1;
|
||||
self.skipped += skipped;
|
||||
}
|
||||
|
||||
/// Updates the position at which the last scan stopped. This may be
|
||||
/// greater than the position of the last candidate reported. For example,
|
||||
/// searching for the "rare" byte `z` in `abczdef` for the pattern `abcz`
|
||||
/// will report a candidate at position `0`, but the end of its last scan
|
||||
/// will be at position `3`.
|
||||
///
|
||||
/// This position factors into the effectiveness of this prefilter. If the
|
||||
/// current position is less than the last position at which a scan ended,
|
||||
/// then the prefilter should not be re-run until the search moves past
|
||||
/// that position.
|
||||
#[inline]
|
||||
fn update_at(&mut self, at: usize) {
|
||||
if at > self.last_scan_at {
|
||||
self.last_scan_at = at;
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if and only if this state indicates that a prefilter is
|
||||
/// still effective.
|
||||
///
|
||||
/// The given pos should correspond to the current starting position of the
|
||||
/// search.
|
||||
#[inline]
|
||||
pub fn is_effective(&mut self, at: usize) -> bool {
|
||||
if self.inert {
|
||||
return false;
|
||||
}
|
||||
if at < self.last_scan_at {
|
||||
return false;
|
||||
}
|
||||
if self.skips < PrefilterState::MIN_SKIPS {
|
||||
return true;
|
||||
}
|
||||
|
||||
let min_avg = PrefilterState::MIN_AVG_FACTOR * self.max_match_len;
|
||||
if self.skipped >= min_avg * self.skips {
|
||||
return true;
|
||||
}
|
||||
|
||||
// We're inert.
|
||||
self.inert = true;
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for constructing the best possible prefilter. When constructed,
|
||||
/// this builder will heuristically select the best prefilter it can build,
|
||||
/// if any, and discard the rest.
|
||||
#[derive(Debug)]
|
||||
pub struct Builder {
|
||||
count: usize,
|
||||
ascii_case_insensitive: bool,
|
||||
start_bytes: StartBytesBuilder,
|
||||
rare_bytes: RareBytesBuilder,
|
||||
packed: Option<packed::Builder>,
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
/// Create a new builder for constructing the best possible prefilter.
|
||||
pub fn new(kind: MatchKind) -> Builder {
|
||||
let pbuilder = kind
|
||||
.as_packed()
|
||||
.map(|kind| packed::Config::new().match_kind(kind).builder());
|
||||
Builder {
|
||||
count: 0,
|
||||
ascii_case_insensitive: false,
|
||||
start_bytes: StartBytesBuilder::new(),
|
||||
rare_bytes: RareBytesBuilder::new(),
|
||||
packed: pbuilder,
|
||||
}
|
||||
}
|
||||
|
||||
/// Enable ASCII case insensitivity. When set, byte strings added to this
|
||||
/// builder will be interpreted without respect to ASCII case.
|
||||
pub fn ascii_case_insensitive(mut self, yes: bool) -> Builder {
|
||||
self.ascii_case_insensitive = yes;
|
||||
self.start_bytes = self.start_bytes.ascii_case_insensitive(yes);
|
||||
self.rare_bytes = self.rare_bytes.ascii_case_insensitive(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Return a prefilter suitable for quickly finding potential matches.
|
||||
///
|
||||
/// All patterns added to an Aho-Corasick automaton should be added to this
|
||||
/// builder before attempting to construct the prefilter.
|
||||
pub fn build(&self) -> Option<PrefilterObj> {
|
||||
match (self.start_bytes.build(), self.rare_bytes.build()) {
|
||||
// If we could build both start and rare prefilters, then there are
|
||||
// a few cases in which we'd want to use the start-byte prefilter
|
||||
// over the rare-byte prefilter, since the former has lower
|
||||
// overhead.
|
||||
(prestart @ Some(_), prerare @ Some(_)) => {
|
||||
// If the start-byte prefilter can scan for a smaller number
|
||||
// of bytes than the rare-byte prefilter, then it's probably
|
||||
// faster.
|
||||
let has_fewer_bytes =
|
||||
self.start_bytes.count < self.rare_bytes.count;
|
||||
// Otherwise, if the combined frequency rank of the detected
|
||||
// bytes in the start-byte prefilter is "close" to the combined
|
||||
// frequency rank of the rare-byte prefilter, then we pick
|
||||
// the start-byte prefilter even if the rare-byte prefilter
|
||||
// heuristically searches for rare bytes. This is because the
|
||||
// rare-byte prefilter has higher constant costs, so we tend to
|
||||
// prefer the start-byte prefilter when we can.
|
||||
let has_rarer_bytes =
|
||||
self.start_bytes.rank_sum <= self.rare_bytes.rank_sum + 50;
|
||||
if has_fewer_bytes || has_rarer_bytes {
|
||||
prestart
|
||||
} else {
|
||||
prerare
|
||||
}
|
||||
}
|
||||
(prestart @ Some(_), None) => prestart,
|
||||
(None, prerare @ Some(_)) => prerare,
|
||||
(None, None) if self.ascii_case_insensitive => None,
|
||||
(None, None) => self
|
||||
.packed
|
||||
.as_ref()
|
||||
.and_then(|b| b.build())
|
||||
.map(|s| PrefilterObj::new(Packed(s))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a literal string to this prefilter builder.
|
||||
pub fn add(&mut self, bytes: &[u8]) {
|
||||
self.count += 1;
|
||||
self.start_bytes.add(bytes);
|
||||
self.rare_bytes.add(bytes);
|
||||
if let Some(ref mut pbuilder) = self.packed {
|
||||
pbuilder.add(bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A type that wraps a packed searcher and implements the `Prefilter`
|
||||
/// interface.
|
||||
#[derive(Clone, Debug)]
|
||||
struct Packed(packed::Searcher);
|
||||
|
||||
impl Prefilter for Packed {
|
||||
fn next_candidate(
|
||||
&self,
|
||||
_state: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Candidate {
|
||||
self.0.find_at(haystack, at).map_or(Candidate::None, Candidate::Match)
|
||||
}
|
||||
|
||||
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn heap_bytes(&self) -> usize {
|
||||
self.0.heap_bytes()
|
||||
}
|
||||
|
||||
fn reports_false_positives(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for constructing a rare byte prefilter.
|
||||
///
|
||||
/// A rare byte prefilter attempts to pick out a small set of rare bytes that
|
||||
/// occurr in the patterns, and then quickly scan to matches of those rare
|
||||
/// bytes.
|
||||
#[derive(Clone, Debug)]
|
||||
struct RareBytesBuilder {
|
||||
/// Whether this prefilter should account for ASCII case insensitivity or
|
||||
/// not.
|
||||
ascii_case_insensitive: bool,
|
||||
/// A set of byte offsets associated with detected rare bytes. An entry is
|
||||
/// only set if a rare byte is detected in a pattern.
|
||||
byte_offsets: RareByteOffsets,
|
||||
/// Whether this is available as a prefilter or not. This can be set to
|
||||
/// false during construction if a condition is seen that invalidates the
|
||||
/// use of the rare-byte prefilter.
|
||||
available: bool,
|
||||
/// The number of bytes set to an active value in `byte_offsets`.
|
||||
count: usize,
|
||||
/// The sum of frequency ranks for the rare bytes detected. This is
|
||||
/// intended to give a heuristic notion of how rare the bytes are.
|
||||
rank_sum: u16,
|
||||
}
|
||||
|
||||
/// A set of rare byte offsets, keyed by byte.
|
||||
#[derive(Clone, Copy)]
|
||||
struct RareByteOffsets {
|
||||
/// When an item in this set has an offset of u8::MAX (255), then it is
|
||||
/// considered unset.
|
||||
set: [RareByteOffset; 256],
|
||||
}
|
||||
|
||||
impl RareByteOffsets {
|
||||
/// Create a new empty set of rare byte offsets.
|
||||
pub fn empty() -> RareByteOffsets {
|
||||
RareByteOffsets { set: [RareByteOffset::default(); 256] }
|
||||
}
|
||||
|
||||
/// Add the given offset for the given byte to this set. If the offset is
|
||||
/// greater than the existing offset, then it overwrites the previous
|
||||
/// value and returns false. If there is no previous value set, then this
|
||||
/// sets it and returns true.
|
||||
///
|
||||
/// The given offset must be active, otherwise this panics.
|
||||
pub fn apply(&mut self, byte: u8, off: RareByteOffset) -> bool {
|
||||
assert!(off.is_active());
|
||||
|
||||
let existing = &mut self.set[byte as usize];
|
||||
if !existing.is_active() {
|
||||
*existing = off;
|
||||
true
|
||||
} else {
|
||||
if existing.max < off.max {
|
||||
*existing = off;
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for RareByteOffsets {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut offsets = vec![];
|
||||
for off in self.set.iter() {
|
||||
if off.is_active() {
|
||||
offsets.push(off);
|
||||
}
|
||||
}
|
||||
f.debug_struct("RareByteOffsets").field("set", &offsets).finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Offsets associated with an occurrence of a "rare" byte in any of the
|
||||
/// patterns used to construct a single Aho-Corasick automaton.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct RareByteOffset {
|
||||
/// The maximum offset at which a particular byte occurs from the start
|
||||
/// of any pattern. This is used as a shift amount. That is, when an
|
||||
/// occurrence of this byte is found, the candidate position reported by
|
||||
/// the prefilter is `position_of_byte - max`, such that the automaton
|
||||
/// will begin its search at a position that is guaranteed to observe a
|
||||
/// match.
|
||||
///
|
||||
/// To avoid accidentally quadratic behavior, a prefilter is considered
|
||||
/// ineffective when it is asked to start scanning from a position that it
|
||||
/// has already scanned past.
|
||||
///
|
||||
/// N.B. The maximum value for this is 254. A value of 255 indicates that
|
||||
/// this is unused. If a rare byte is found at an offset of 255 or greater,
|
||||
/// then the rare-byte prefilter is disabled for simplicity.
|
||||
max: u8,
|
||||
}
|
||||
|
||||
impl Default for RareByteOffset {
|
||||
fn default() -> RareByteOffset {
|
||||
RareByteOffset { max: u8::MAX }
|
||||
}
|
||||
}
|
||||
|
||||
impl RareByteOffset {
|
||||
/// Create a new rare byte offset. If the given offset is too big, then
|
||||
/// an inactive `RareByteOffset` is returned.
|
||||
fn new(max: usize) -> RareByteOffset {
|
||||
if max > (u8::MAX - 1) as usize {
|
||||
RareByteOffset::default()
|
||||
} else {
|
||||
RareByteOffset { max: max as u8 }
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this offset is active. If it's inactive,
|
||||
/// then it should not be used.
|
||||
fn is_active(&self) -> bool {
|
||||
self.max < u8::MAX
|
||||
}
|
||||
}
|
||||
|
||||
impl RareBytesBuilder {
|
||||
/// Create a new builder for constructing a rare byte prefilter.
|
||||
fn new() -> RareBytesBuilder {
|
||||
RareBytesBuilder {
|
||||
ascii_case_insensitive: false,
|
||||
byte_offsets: RareByteOffsets::empty(),
|
||||
available: true,
|
||||
count: 0,
|
||||
rank_sum: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Enable ASCII case insensitivity. When set, byte strings added to this
|
||||
/// builder will be interpreted without respect to ASCII case.
|
||||
fn ascii_case_insensitive(mut self, yes: bool) -> RareBytesBuilder {
|
||||
self.ascii_case_insensitive = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the rare bytes prefilter.
|
||||
///
|
||||
/// If there are more than 3 distinct starting bytes, or if heuristics
|
||||
/// otherwise determine that this prefilter should not be used, then `None`
|
||||
/// is returned.
|
||||
fn build(&self) -> Option<PrefilterObj> {
|
||||
if !self.available || self.count > 3 {
|
||||
return None;
|
||||
}
|
||||
let (mut bytes, mut len) = ([0; 3], 0);
|
||||
for b in 0..256 {
|
||||
if self.byte_offsets.set[b].is_active() {
|
||||
bytes[len] = b as u8;
|
||||
len += 1;
|
||||
}
|
||||
}
|
||||
match len {
|
||||
0 => None,
|
||||
1 => Some(PrefilterObj::new(RareBytesOne {
|
||||
byte1: bytes[0],
|
||||
offset: self.byte_offsets.set[bytes[0] as usize],
|
||||
})),
|
||||
2 => Some(PrefilterObj::new(RareBytesTwo {
|
||||
offsets: self.byte_offsets,
|
||||
byte1: bytes[0],
|
||||
byte2: bytes[1],
|
||||
})),
|
||||
3 => Some(PrefilterObj::new(RareBytesThree {
|
||||
offsets: self.byte_offsets,
|
||||
byte1: bytes[0],
|
||||
byte2: bytes[1],
|
||||
byte3: bytes[2],
|
||||
})),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a byte string to this builder.
|
||||
///
|
||||
/// All patterns added to an Aho-Corasick automaton should be added to this
|
||||
/// builder before attempting to construct the prefilter.
|
||||
fn add(&mut self, bytes: &[u8]) {
|
||||
// If we've already blown our budget, then don't waste time looking
|
||||
// for more rare bytes.
|
||||
if self.count > 3 {
|
||||
self.available = false;
|
||||
return;
|
||||
}
|
||||
let mut rarest = match bytes.get(0) {
|
||||
None => return,
|
||||
Some(&b) => (b, 0, freq_rank(b)),
|
||||
};
|
||||
// The idea here is to look for the rarest byte in each pattern, and
|
||||
// add that to our set. As a special exception, if we see a byte that
|
||||
// we've already added, then we immediately stop and choose that byte,
|
||||
// even if there's another rare byte in the pattern. This helps us
|
||||
// apply the rare byte optimization in more cases by attempting to pick
|
||||
// bytes that are in common between patterns. So for example, if we
|
||||
// were searching for `Sherlock` and `lockjaw`, then this would pick
|
||||
// `k` for both patterns, resulting in the use of `memchr` instead of
|
||||
// `memchr2` for `k` and `j`.
|
||||
for (pos, &b) in bytes.iter().enumerate() {
|
||||
if self.byte_offsets.set[b as usize].is_active() {
|
||||
self.add_rare_byte(b, pos);
|
||||
return;
|
||||
}
|
||||
let rank = freq_rank(b);
|
||||
if rank < rarest.2 {
|
||||
rarest = (b, pos, rank);
|
||||
}
|
||||
}
|
||||
self.add_rare_byte(rarest.0, rarest.1);
|
||||
}
|
||||
|
||||
fn add_rare_byte(&mut self, byte: u8, pos: usize) {
|
||||
self.add_one_byte(byte, pos);
|
||||
if self.ascii_case_insensitive {
|
||||
self.add_one_byte(opposite_ascii_case(byte), pos);
|
||||
}
|
||||
}
|
||||
|
||||
fn add_one_byte(&mut self, byte: u8, pos: usize) {
|
||||
let off = RareByteOffset::new(pos);
|
||||
if !off.is_active() {
|
||||
self.available = false;
|
||||
return;
|
||||
}
|
||||
if self.byte_offsets.apply(byte, off) {
|
||||
self.count += 1;
|
||||
self.rank_sum += freq_rank(byte) as u16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A prefilter for scanning for a single "rare" byte.
|
||||
#[derive(Clone, Debug)]
|
||||
struct RareBytesOne {
|
||||
byte1: u8,
|
||||
offset: RareByteOffset,
|
||||
}
|
||||
|
||||
impl Prefilter for RareBytesOne {
|
||||
fn next_candidate(
|
||||
&self,
|
||||
state: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Candidate {
|
||||
memchr(self.byte1, &haystack[at..])
|
||||
.map(|i| {
|
||||
let pos = at + i;
|
||||
state.last_scan_at = pos;
|
||||
cmp::max(at, pos.saturating_sub(self.offset.max as usize))
|
||||
})
|
||||
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||
}
|
||||
|
||||
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn heap_bytes(&self) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// A prefilter for scanning for two "rare" bytes.
|
||||
#[derive(Clone, Debug)]
|
||||
struct RareBytesTwo {
|
||||
offsets: RareByteOffsets,
|
||||
byte1: u8,
|
||||
byte2: u8,
|
||||
}
|
||||
|
||||
impl Prefilter for RareBytesTwo {
|
||||
fn next_candidate(
|
||||
&self,
|
||||
state: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Candidate {
|
||||
memchr2(self.byte1, self.byte2, &haystack[at..])
|
||||
.map(|i| {
|
||||
let pos = at + i;
|
||||
state.update_at(pos);
|
||||
let offset = self.offsets.set[haystack[pos] as usize].max;
|
||||
cmp::max(at, pos.saturating_sub(offset as usize))
|
||||
})
|
||||
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||
}
|
||||
|
||||
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn heap_bytes(&self) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// A prefilter for scanning for three "rare" bytes.
|
||||
#[derive(Clone, Debug)]
|
||||
struct RareBytesThree {
|
||||
offsets: RareByteOffsets,
|
||||
byte1: u8,
|
||||
byte2: u8,
|
||||
byte3: u8,
|
||||
}
|
||||
|
||||
impl Prefilter for RareBytesThree {
|
||||
fn next_candidate(
|
||||
&self,
|
||||
state: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Candidate {
|
||||
memchr3(self.byte1, self.byte2, self.byte3, &haystack[at..])
|
||||
.map(|i| {
|
||||
let pos = at + i;
|
||||
state.update_at(pos);
|
||||
let offset = self.offsets.set[haystack[pos] as usize].max;
|
||||
cmp::max(at, pos.saturating_sub(offset as usize))
|
||||
})
|
||||
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||
}
|
||||
|
||||
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn heap_bytes(&self) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for constructing a starting byte prefilter.
|
||||
///
|
||||
/// A starting byte prefilter is a simplistic prefilter that looks for possible
|
||||
/// matches by reporting all positions corresponding to a particular byte. This
|
||||
/// generally only takes affect when there are at most 3 distinct possible
|
||||
/// starting bytes. e.g., the patterns `foo`, `bar`, and `baz` have two
|
||||
/// distinct starting bytes (`f` and `b`), and this prefiler returns all
|
||||
/// occurrences of either `f` or `b`.
|
||||
///
|
||||
/// In some cases, a heuristic frequency analysis may determine that it would
|
||||
/// be better not to use this prefilter even when there are 3 or fewer distinct
|
||||
/// starting bytes.
|
||||
#[derive(Clone, Debug)]
|
||||
struct StartBytesBuilder {
|
||||
/// Whether this prefilter should account for ASCII case insensitivity or
|
||||
/// not.
|
||||
ascii_case_insensitive: bool,
|
||||
/// The set of starting bytes observed.
|
||||
byteset: Vec<bool>,
|
||||
/// The number of bytes set to true in `byteset`.
|
||||
count: usize,
|
||||
/// The sum of frequency ranks for the rare bytes detected. This is
|
||||
/// intended to give a heuristic notion of how rare the bytes are.
|
||||
rank_sum: u16,
|
||||
}
|
||||
|
||||
impl StartBytesBuilder {
|
||||
/// Create a new builder for constructing a start byte prefilter.
|
||||
fn new() -> StartBytesBuilder {
|
||||
StartBytesBuilder {
|
||||
ascii_case_insensitive: false,
|
||||
byteset: vec![false; 256],
|
||||
count: 0,
|
||||
rank_sum: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Enable ASCII case insensitivity. When set, byte strings added to this
|
||||
/// builder will be interpreted without respect to ASCII case.
|
||||
fn ascii_case_insensitive(mut self, yes: bool) -> StartBytesBuilder {
|
||||
self.ascii_case_insensitive = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the starting bytes prefilter.
|
||||
///
|
||||
/// If there are more than 3 distinct starting bytes, or if heuristics
|
||||
/// otherwise determine that this prefilter should not be used, then `None`
|
||||
/// is returned.
|
||||
fn build(&self) -> Option<PrefilterObj> {
|
||||
if self.count > 3 {
|
||||
return None;
|
||||
}
|
||||
let (mut bytes, mut len) = ([0; 3], 0);
|
||||
for b in 0..256 {
|
||||
if !self.byteset[b] {
|
||||
continue;
|
||||
}
|
||||
// We don't handle non-ASCII bytes for now. Getting non-ASCII
|
||||
// bytes right is trickier, since we generally don't want to put
|
||||
// a leading UTF-8 code unit into a prefilter that isn't ASCII,
|
||||
// since they can frequently. Instead, it would be better to use a
|
||||
// continuation byte, but this requires more sophisticated analysis
|
||||
// of the automaton and a richer prefilter API.
|
||||
if b > 0x7F {
|
||||
return None;
|
||||
}
|
||||
bytes[len] = b as u8;
|
||||
len += 1;
|
||||
}
|
||||
match len {
|
||||
0 => None,
|
||||
1 => Some(PrefilterObj::new(StartBytesOne { byte1: bytes[0] })),
|
||||
2 => Some(PrefilterObj::new(StartBytesTwo {
|
||||
byte1: bytes[0],
|
||||
byte2: bytes[1],
|
||||
})),
|
||||
3 => Some(PrefilterObj::new(StartBytesThree {
|
||||
byte1: bytes[0],
|
||||
byte2: bytes[1],
|
||||
byte3: bytes[2],
|
||||
})),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a byte string to this builder.
|
||||
///
|
||||
/// All patterns added to an Aho-Corasick automaton should be added to this
|
||||
/// builder before attempting to construct the prefilter.
|
||||
fn add(&mut self, bytes: &[u8]) {
|
||||
if self.count > 3 {
|
||||
return;
|
||||
}
|
||||
if let Some(&byte) = bytes.get(0) {
|
||||
self.add_one_byte(byte);
|
||||
if self.ascii_case_insensitive {
|
||||
self.add_one_byte(opposite_ascii_case(byte));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_one_byte(&mut self, byte: u8) {
|
||||
if !self.byteset[byte as usize] {
|
||||
self.byteset[byte as usize] = true;
|
||||
self.count += 1;
|
||||
self.rank_sum += freq_rank(byte) as u16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A prefilter for scanning for a single starting byte.
|
||||
#[derive(Clone, Debug)]
|
||||
struct StartBytesOne {
|
||||
byte1: u8,
|
||||
}
|
||||
|
||||
impl Prefilter for StartBytesOne {
|
||||
fn next_candidate(
|
||||
&self,
|
||||
_state: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Candidate {
|
||||
memchr(self.byte1, &haystack[at..])
|
||||
.map(|i| at + i)
|
||||
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||
}
|
||||
|
||||
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn heap_bytes(&self) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// A prefilter for scanning for two starting bytes.
|
||||
#[derive(Clone, Debug)]
|
||||
struct StartBytesTwo {
|
||||
byte1: u8,
|
||||
byte2: u8,
|
||||
}
|
||||
|
||||
impl Prefilter for StartBytesTwo {
|
||||
fn next_candidate(
|
||||
&self,
|
||||
_state: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Candidate {
|
||||
memchr2(self.byte1, self.byte2, &haystack[at..])
|
||||
.map(|i| at + i)
|
||||
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||
}
|
||||
|
||||
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn heap_bytes(&self) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// A prefilter for scanning for three starting bytes.
|
||||
#[derive(Clone, Debug)]
|
||||
struct StartBytesThree {
|
||||
byte1: u8,
|
||||
byte2: u8,
|
||||
byte3: u8,
|
||||
}
|
||||
|
||||
impl Prefilter for StartBytesThree {
|
||||
fn next_candidate(
|
||||
&self,
|
||||
_state: &mut PrefilterState,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Candidate {
|
||||
memchr3(self.byte1, self.byte2, self.byte3, &haystack[at..])
|
||||
.map(|i| at + i)
|
||||
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||
}
|
||||
|
||||
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn heap_bytes(&self) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the next candidate reported by the given prefilter while
|
||||
/// simultaneously updating the given prestate.
|
||||
///
|
||||
/// The caller is responsible for checking the prestate before deciding whether
|
||||
/// to initiate a search.
|
||||
#[inline]
|
||||
pub fn next<P: Prefilter>(
|
||||
prestate: &mut PrefilterState,
|
||||
prefilter: P,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Candidate {
|
||||
let cand = prefilter.next_candidate(prestate, haystack, at);
|
||||
match cand {
|
||||
Candidate::None => {
|
||||
prestate.update_skipped_bytes(haystack.len() - at);
|
||||
}
|
||||
Candidate::Match(ref m) => {
|
||||
prestate.update_skipped_bytes(m.start() - at);
|
||||
}
|
||||
Candidate::PossibleStartOfMatch(i) => {
|
||||
prestate.update_skipped_bytes(i - at);
|
||||
}
|
||||
}
|
||||
cand
|
||||
}
|
||||
|
||||
/// If the given byte is an ASCII letter, then return it in the opposite case.
|
||||
/// e.g., Given `b'A'`, this returns `b'a'`, and given `b'a'`, this returns
|
||||
/// `b'A'`. If a non-ASCII letter is given, then the given byte is returned.
|
||||
pub fn opposite_ascii_case(b: u8) -> u8 {
|
||||
if b'A' <= b && b <= b'Z' {
|
||||
b.to_ascii_lowercase()
|
||||
} else if b'a' <= b && b <= b'z' {
|
||||
b.to_ascii_uppercase()
|
||||
} else {
|
||||
b
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the frequency rank of the given byte. The higher the rank, the more
|
||||
/// common the byte (heuristically speaking).
|
||||
fn freq_rank(b: u8) -> u8 {
|
||||
use byte_frequencies::BYTE_FREQUENCIES;
|
||||
BYTE_FREQUENCIES[b as usize]
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn scratch() {
|
||||
let mut b = Builder::new(MatchKind::LeftmostFirst);
|
||||
b.add(b"Sherlock");
|
||||
b.add(b"locjaw");
|
||||
// b.add(b"Sherlock");
|
||||
// b.add(b"Holmes");
|
||||
// b.add(b"Watson");
|
||||
// b.add("Шерлок Холмс".as_bytes());
|
||||
// b.add("Джон Уотсон".as_bytes());
|
||||
|
||||
let s = b.build().unwrap();
|
||||
println!("{:?}", s);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
|
||||
use error::{Error, Result};
|
||||
|
||||
// NOTE: Most of this code was copied from regex-automata, but without the
|
||||
// (de)serialization specific stuff.
|
||||
|
||||
/// Check that the premultiplication of the given state identifier can
|
||||
/// fit into the representation indicated by `S`. If it cannot, or if it
|
||||
/// overflows `usize` itself, then an error is returned.
|
||||
pub fn premultiply_overflow_error<S: StateID>(
|
||||
last_state: S,
|
||||
alphabet_len: usize,
|
||||
) -> Result<()> {
|
||||
let requested = match last_state.to_usize().checked_mul(alphabet_len) {
|
||||
Some(requested) => requested,
|
||||
None => return Err(Error::premultiply_overflow(0, 0)),
|
||||
};
|
||||
if requested > S::max_id() {
|
||||
return Err(Error::premultiply_overflow(S::max_id(), requested));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert the given `usize` to the chosen state identifier
|
||||
/// representation. If the given value cannot fit in the chosen
|
||||
/// representation, then an error is returned.
|
||||
pub fn usize_to_state_id<S: StateID>(value: usize) -> Result<S> {
|
||||
if value > S::max_id() {
|
||||
Err(Error::state_id_overflow(S::max_id()))
|
||||
} else {
|
||||
Ok(S::from_usize(value))
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the unique identifier for an automaton's fail state in the chosen
|
||||
/// representation indicated by `S`.
|
||||
pub fn fail_id<S: StateID>() -> S {
|
||||
S::from_usize(0)
|
||||
}
|
||||
|
||||
/// Return the unique identifier for an automaton's fail state in the chosen
|
||||
/// representation indicated by `S`.
|
||||
pub fn dead_id<S: StateID>() -> S {
|
||||
S::from_usize(1)
|
||||
}
|
||||
|
||||
mod private {
|
||||
/// Sealed stops crates other than aho-corasick from implementing any
|
||||
/// traits that use it.
|
||||
pub trait Sealed {}
|
||||
impl Sealed for u8 {}
|
||||
impl Sealed for u16 {}
|
||||
impl Sealed for u32 {}
|
||||
impl Sealed for u64 {}
|
||||
impl Sealed for usize {}
|
||||
}
|
||||
|
||||
/// A trait describing the representation of an automaton's state identifier.
|
||||
///
|
||||
/// The purpose of this trait is to safely express both the possible state
|
||||
/// identifier representations that can be used in an automaton and to convert
|
||||
/// between state identifier representations and types that can be used to
|
||||
/// efficiently index memory (such as `usize`).
|
||||
///
|
||||
/// In general, one should not need to implement this trait explicitly. Indeed,
|
||||
/// for now, this trait is sealed such that it cannot be implemented by any
|
||||
/// other type. In particular, this crate provides implementations for `u8`,
|
||||
/// `u16`, `u32`, `u64` and `usize`. (`u32` and `u64` are only provided for
|
||||
/// targets that can represent all corresponding values in a `usize`.)
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This trait is unsafe because the correctness of its implementations may be
|
||||
/// relied upon by other unsafe code. For example, one possible way to
|
||||
/// implement this trait incorrectly would be to return a maximum identifier
|
||||
/// in `max_id` that is greater than the real maximum identifier. This will
|
||||
/// likely result in wrap-on-overflow semantics in release mode, which can in
|
||||
/// turn produce incorrect state identifiers. Those state identifiers may then
|
||||
/// in turn access out-of-bounds memory in an automaton's search routine, where
|
||||
/// bounds checks are explicitly elided for performance reasons.
|
||||
pub unsafe trait StateID:
|
||||
private::Sealed
|
||||
+ Clone
|
||||
+ Copy
|
||||
+ Debug
|
||||
+ Eq
|
||||
+ Hash
|
||||
+ PartialEq
|
||||
+ PartialOrd
|
||||
+ Ord
|
||||
{
|
||||
/// Convert from a `usize` to this implementation's representation.
|
||||
///
|
||||
/// Implementors may assume that `n <= Self::max_id`. That is, implementors
|
||||
/// do not need to check whether `n` can fit inside this implementation's
|
||||
/// representation.
|
||||
fn from_usize(n: usize) -> Self;
|
||||
|
||||
/// Convert this implementation's representation to a `usize`.
|
||||
///
|
||||
/// Implementors must not return a `usize` value greater than
|
||||
/// `Self::max_id` and must not permit overflow when converting between the
|
||||
/// implementor's representation and `usize`. In general, the preferred
|
||||
/// way for implementors to achieve this is to simply not provide
|
||||
/// implementations of `StateID` that cannot fit into the target platform's
|
||||
/// `usize`.
|
||||
fn to_usize(self) -> usize;
|
||||
|
||||
/// Return the maximum state identifier supported by this representation.
|
||||
///
|
||||
/// Implementors must return a correct bound. Doing otherwise may result
|
||||
/// in memory unsafety.
|
||||
fn max_id() -> usize;
|
||||
}
|
||||
|
||||
unsafe impl StateID for usize {
|
||||
#[inline]
|
||||
fn from_usize(n: usize) -> usize {
|
||||
n
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_usize(self) -> usize {
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn max_id() -> usize {
|
||||
::std::usize::MAX
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl StateID for u8 {
|
||||
#[inline]
|
||||
fn from_usize(n: usize) -> u8 {
|
||||
n as u8
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn max_id() -> usize {
|
||||
::std::u8::MAX as usize
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl StateID for u16 {
|
||||
#[inline]
|
||||
fn from_usize(n: usize) -> u16 {
|
||||
n as u16
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn max_id() -> usize {
|
||||
::std::u16::MAX as usize
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
|
||||
unsafe impl StateID for u32 {
|
||||
#[inline]
|
||||
fn from_usize(n: usize) -> u32 {
|
||||
n as u32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn max_id() -> usize {
|
||||
::std::u32::MAX as usize
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
unsafe impl StateID for u64 {
|
||||
#[inline]
|
||||
fn from_usize(n: usize) -> u64 {
|
||||
n as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn max_id() -> usize {
|
||||
::std::u64::MAX as usize
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
|||
{"files":{"CHANGELOG.md":"4d03cdc2987a1fa1b86a2de5fa57714d83cbb9d3d3f400eadecd8e8a0a857621","Cargo.toml":"a9ec8b36707f907971b410719b85e9594cb96c9e4bca6f831e2cc78ba22c71da","LICENSE":"0dd39f89842df915b8ded7ac59e8a1372cf5be36133818866cca2ef3af1a2849","README.md":"132383b73044b1e91acb9e5d69afeb8f14239cfe712aca59152bfe0c420f7a33","examples/test.rs":"4e9e73dfe80573296e93f66c2c03681908c278a758dceb4913ecb65d20e9ed86","src/lib.rs":"7a0e852a4bbfbf72c7702527d7c6f7f8c717fca77bfd4b3e78ba7f6cebed4e6f","src/line.rs":"edbdc54503342733f8aa7a4aa72a7cb08d376d53ca2b85e00a77dd42bf04bb22","src/shapes/mod.rs":"071d6ea4080dc8f1e4299258d65c32bccc40e9eb6933f3b3600576d58e7917ae","src/shapes/rectangle.rs":"ad545b9d4a628b3a515deb9b087f881b253d3f3a16a60734da82896d51c93cc9","src/text/fontconfig.rs":"c673bfcf5df387479dd2027a733d8de85461731b448202f49a9f2d1bce54f465","src/text/mod.rs":"4afd25c6297d55cd5a3956e5ae6d3921403b306533a237fe2e5eab33e65a91ee"},"package":"9b7f09f89872c2b6b29e319377b1fbe91c6f5947df19a25596e121cf19a7b35e"}
|
|
@ -0,0 +1,49 @@
|
|||
"""
|
||||
cargo-raze crate build file.
|
||||
|
||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||
"""
|
||||
package(default_visibility = [
|
||||
# Public for visibility by "@raze__crate__version//" targets.
|
||||
#
|
||||
# Prefer access through "//third_party/cargo", which limits external
|
||||
# visibility to explicit Cargo.toml dependencies.
|
||||
"//visibility:public",
|
||||
])
|
||||
|
||||
licenses([
|
||||
"notice", # "MIT"
|
||||
])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||
"rust_library",
|
||||
"rust_binary",
|
||||
"rust_test",
|
||||
)
|
||||
|
||||
|
||||
|
||||
rust_library(
|
||||
name = "andrew",
|
||||
crate_root = "src/lib.rs",
|
||||
crate_type = "lib",
|
||||
edition = "2015",
|
||||
srcs = glob(["**/*.rs"]),
|
||||
deps = [
|
||||
"//third_party/cargo/vendor/bitflags-1.2.1:bitflags",
|
||||
"//third_party/cargo/vendor/line_drawing-0.7.0:line_drawing",
|
||||
"//third_party/cargo/vendor/walkdir-2.3.1:walkdir",
|
||||
"//third_party/cargo/vendor/xdg-2.2.0:xdg",
|
||||
"//third_party/cargo/vendor/xml-rs-0.8.0:xml_rs",
|
||||
"//third_party/cargo/vendor/rusttype-0.8.2:rusttype",
|
||||
],
|
||||
rustc_flags = [
|
||||
"--cap-lints=allow",
|
||||
],
|
||||
version = "0.2.1",
|
||||
crate_features = [
|
||||
],
|
||||
)
|
||||
|
||||
# Unsupported target "test" with type "example" omitted
|
|
@ -0,0 +1,47 @@
|
|||
# Change Log
|
||||
|
||||
## Unreleased
|
||||
|
||||
## 0.2.1 -- 2019-03-29
|
||||
|
||||
- Fix `get_width()` for texts that start and end with spaces
|
||||
|
||||
## 0.2.0 -- 2019-01-26
|
||||
|
||||
- **[Breaking]** Canvas is now endian aware and will draw to the buffer in the endianness of the `Endian` its created with
|
||||
|
||||
## 0.1.6 -- 2019-01-24
|
||||
|
||||
- Faster drawing of horizontal and verticle lines by precomputing line boundaries
|
||||
- Only calculate alpha overlay when drawing colors without a non-max alpha value for performance
|
||||
|
||||
## 0.1.5 -- 2019-01-13
|
||||
|
||||
- Fix drawing of characters with negative bounding boxes
|
||||
- Fix error in `get_width()` for text without any characters
|
||||
|
||||
## 0.1.4 -- 2018-11-10
|
||||
|
||||
- Remove rusttype version restriction
|
||||
|
||||
## 0.1.3 -- 2018-10-09
|
||||
|
||||
- Move from `quick-xml` to `xml-rs` dependency
|
||||
|
||||
## 0.1.2 -- 2018-10-04
|
||||
|
||||
- Add basic/experimental support for fontconfig in `andrew::text::fontconfig`
|
||||
|
||||
## 0.1.1 -- 2018-09-17
|
||||
|
||||
- Manage dependencies to maintain rust 1.22 compatibility
|
||||
- Update rusttype to 0.7.1
|
||||
|
||||
## 0.1.0 -- 2018-08-17
|
||||
|
||||
Initial version, including:
|
||||
|
||||
- canvas
|
||||
- lines
|
||||
- rectangles
|
||||
- text
|
|
@ -0,0 +1,41 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g. crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
name = "andrew"
|
||||
version = "0.2.1"
|
||||
authors = ["Lucas Timmins <timmins.s.lucas@gmail.com>"]
|
||||
description = "The andrew crate provides convenient drawing of objects such as shapes, lines and text to buffers"
|
||||
readme = "README.md"
|
||||
keywords = ["draw", "buffer", "shapes", "lines", "pixels"]
|
||||
categories = ["rendering", "multimedia::images"]
|
||||
license = "MIT"
|
||||
repository = "https://github.com/trimental/andrew"
|
||||
[dependencies.bitflags]
|
||||
version = "1.0.3"
|
||||
|
||||
[dependencies.line_drawing]
|
||||
version = "0.7.0"
|
||||
|
||||
[dependencies.rusttype]
|
||||
version = "0.7.1"
|
||||
|
||||
[dependencies.walkdir]
|
||||
version = "2.0"
|
||||
|
||||
[dependencies.xdg]
|
||||
version = "2.1.0"
|
||||
|
||||
[dependencies.xml-rs]
|
||||
version = "0.8.0"
|
||||
[dev-dependencies.smithay-client-toolkit]
|
||||
version = "0.4.0"
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2018 Lucas Timmins
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,3 @@
|
|||
# Andrew
|
||||
|
||||
This crate provides convenient drawing of objects such as shapes, lines and text to buffers
|
|
@ -0,0 +1,210 @@
|
|||
extern crate andrew;
|
||||
extern crate smithay_client_toolkit as sctk;
|
||||
|
||||
use std::io::{Read, Seek, SeekFrom, Write};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use sctk::utils::{DoubleMemPool, MemPool};
|
||||
use sctk::window::{ConceptFrame, Event as WEvent, Window};
|
||||
use sctk::Environment;
|
||||
|
||||
use sctk::reexports::client::protocol::wl_compositor::RequestsTrait as CompositorRequests;
|
||||
use sctk::reexports::client::protocol::wl_surface::RequestsTrait as SurfaceRequests;
|
||||
use sctk::reexports::client::protocol::{wl_shm, wl_surface};
|
||||
use sctk::reexports::client::{Display, Proxy};
|
||||
|
||||
use andrew::shapes::rectangle;
|
||||
use andrew::text;
|
||||
use andrew::text::fontconfig;
|
||||
|
||||
fn main() {
|
||||
let (display, mut event_queue) =
|
||||
Display::connect_to_env().expect("Failed to connect to the wayland server.");
|
||||
let env = Environment::from_display(&*display, &mut event_queue).unwrap();
|
||||
|
||||
let seat = env
|
||||
.manager
|
||||
.instantiate_auto(|seat| seat.implement(|_, _| {}, ()))
|
||||
.unwrap();
|
||||
|
||||
let mut dimensions = (600, 400);
|
||||
let surface = env
|
||||
.compositor
|
||||
.create_surface(|surface| surface.implement(|_, _| {}, ()))
|
||||
.unwrap();
|
||||
|
||||
let next_action = Arc::new(Mutex::new(None::<WEvent>));
|
||||
|
||||
let waction = next_action.clone();
|
||||
let mut window = Window::<ConceptFrame>::init_from_env(&env, surface, dimensions, move |evt| {
|
||||
let mut next_action = waction.lock().unwrap();
|
||||
// Keep last event in priority order : Close > Configure > Refresh
|
||||
let replace = match (&evt, &*next_action) {
|
||||
(_, &None)
|
||||
| (_, &Some(WEvent::Refresh))
|
||||
| (&WEvent::Configure { .. }, &Some(WEvent::Configure { .. }))
|
||||
| (&WEvent::Close, _) => true,
|
||||
_ => false,
|
||||
};
|
||||
if replace {
|
||||
*next_action = Some(evt);
|
||||
}
|
||||
})
|
||||
.expect("Failed to create a window !");
|
||||
|
||||
window.new_seat(&seat);
|
||||
|
||||
let mut pools = DoubleMemPool::new(&env.shm, || {}).expect("Failed to create a memory pool !");
|
||||
|
||||
let mut font_data = Vec::new();
|
||||
::std::fs::File::open(
|
||||
&fontconfig::FontConfig::new()
|
||||
.unwrap()
|
||||
.get_regular_family_fonts("sans")
|
||||
.unwrap()[0],
|
||||
)
|
||||
.unwrap()
|
||||
.read_to_end(&mut font_data)
|
||||
.unwrap();
|
||||
|
||||
if !env.shell.needs_configure() {
|
||||
if let Some(pool) = pools.pool() {
|
||||
redraw(pool, window.surface(), dimensions, &font_data);
|
||||
}
|
||||
window.refresh();
|
||||
}
|
||||
|
||||
loop {
|
||||
match next_action.lock().unwrap().take() {
|
||||
Some(WEvent::Close) => break,
|
||||
Some(WEvent::Refresh) => {
|
||||
window.refresh();
|
||||
window.surface().commit();
|
||||
}
|
||||
Some(WEvent::Configure { new_size, .. }) => {
|
||||
if let Some((w, h)) = new_size {
|
||||
window.resize(w, h);
|
||||
dimensions = (w, h)
|
||||
}
|
||||
window.refresh();
|
||||
if let Some(pool) = pools.pool() {
|
||||
redraw(pool, window.surface(), dimensions, &font_data);
|
||||
}
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
|
||||
display.flush().unwrap();
|
||||
event_queue.dispatch().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn redraw(
|
||||
pool: &mut MemPool,
|
||||
surface: &Proxy<wl_surface::WlSurface>,
|
||||
dimensions: (u32, u32),
|
||||
font_data: &[u8],
|
||||
) {
|
||||
let (buf_x, buf_y) = (dimensions.0 as usize, dimensions.1 as usize);
|
||||
|
||||
pool.resize(4 * buf_x * buf_y)
|
||||
.expect("Failed to resize the memory pool.");
|
||||
|
||||
let mut buf: Vec<u8> = vec![255; 4 * buf_x * buf_y];
|
||||
let mut canvas =
|
||||
andrew::Canvas::new(&mut buf, buf_x, buf_y, 4 * buf_x, andrew::Endian::native());
|
||||
|
||||
println!("______________");
|
||||
let mut total_dur = Duration::new(0, 0);
|
||||
|
||||
// Draw background
|
||||
let (block_w, block_h) = (buf_x / 20, buf_y / 20);
|
||||
for block_y in 0..21 {
|
||||
for block_x in 0..21 {
|
||||
let color = if (block_x + (block_y % 2)) % 2 == 0 {
|
||||
[255, 0, 0, 0]
|
||||
} else {
|
||||
[255, 255, 255, 255]
|
||||
};
|
||||
|
||||
let block = rectangle::Rectangle::new(
|
||||
(block_w * block_x, block_h * block_y),
|
||||
(block_w, block_h),
|
||||
None,
|
||||
Some(color),
|
||||
);
|
||||
let timer = Instant::now();
|
||||
canvas.draw(&block);
|
||||
total_dur += timer.elapsed()
|
||||
}
|
||||
}
|
||||
println!("Background draw time: {:?}", total_dur);
|
||||
|
||||
let rectangle = rectangle::Rectangle::new(
|
||||
(buf_x / 30, buf_y / 4),
|
||||
(buf_x - (buf_x / 30) * 2, buf_y - buf_y / 2),
|
||||
Some((
|
||||
15,
|
||||
[255, 170, 20, 45],
|
||||
rectangle::Sides::TOP ^ rectangle::Sides::BOTTOM,
|
||||
Some(10),
|
||||
)),
|
||||
Some([255, 170, 20, 45]),
|
||||
);
|
||||
let mut timer = Instant::now();
|
||||
canvas.draw(&rectangle);
|
||||
println!("Rectangle draw time: {:?}", timer.elapsed());
|
||||
total_dur += timer.elapsed();
|
||||
|
||||
let text_h = buf_x as f32 / 80.;
|
||||
let text_hh = text_h / 2.;
|
||||
let mut text = text::Text::new(
|
||||
(63, 69),
|
||||
[255, 255, 255, 255],
|
||||
font_data,
|
||||
text_h,
|
||||
2.0,
|
||||
"“Life is the art of drawing without an eraser.” - John W. Gardner",
|
||||
);
|
||||
text.pos = (
|
||||
buf_x / 2 - text.get_width() / 2,
|
||||
buf_y / 2 - text_hh as usize,
|
||||
);
|
||||
|
||||
let text_box = rectangle::Rectangle::new(
|
||||
(
|
||||
buf_x / 2 - text.get_width() / 2 - 10,
|
||||
buf_y / 2 - text_hh as usize - 10,
|
||||
),
|
||||
(text.get_width() + 20, text_h as usize + 20),
|
||||
Some((3, [255, 255, 255, 255], rectangle::Sides::ALL, Some(5))),
|
||||
None,
|
||||
);
|
||||
|
||||
timer = Instant::now();
|
||||
canvas.draw(&text_box);
|
||||
println!("Text box draw time: {:?}", timer.elapsed());
|
||||
total_dur += timer.elapsed();
|
||||
|
||||
timer = Instant::now();
|
||||
canvas.draw(&text);
|
||||
println!("Text draw time: {:?}", timer.elapsed());
|
||||
total_dur += timer.elapsed();
|
||||
|
||||
println!("Total draw time: {:?}", total_dur);
|
||||
|
||||
pool.seek(SeekFrom::Start(0)).unwrap();
|
||||
pool.write_all(canvas.buffer).unwrap();
|
||||
pool.flush().unwrap();
|
||||
|
||||
let new_buffer = pool.buffer(
|
||||
0,
|
||||
buf_x as i32,
|
||||
buf_y as i32,
|
||||
4 * buf_x as i32,
|
||||
wl_shm::Format::Argb8888,
|
||||
);
|
||||
surface.attach(Some(&new_buffer), 0, 0);
|
||||
surface.commit();
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
//! Andrew is a crate for drawing objects
|
||||
#![warn(missing_docs)]
|
||||
extern crate line_drawing;
|
||||
extern crate rusttype;
|
||||
extern crate walkdir;
|
||||
extern crate xdg;
|
||||
extern crate xml;
|
||||
|
||||
#[macro_use]
|
||||
extern crate bitflags;
|
||||
|
||||
/// A module that contains functions and objects relating to lines
|
||||
pub mod line;
|
||||
/// A module that contains functions and objects relating to shapes
|
||||
pub mod shapes;
|
||||
/// A module that contains functions and objects relating to text
|
||||
pub mod text;
|
||||
|
||||
/// The Drawable trait allows object to be drawn to a buffer or canvas
|
||||
pub trait Drawable {
|
||||
/// A function that draws the object to a canvas
|
||||
fn draw(&self, canvas: &mut Canvas);
|
||||
}
|
||||
|
||||
/// Describes an endianness (aka byte order)
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Endian {
|
||||
/// Little Endian
|
||||
Little,
|
||||
/// Big Endian
|
||||
Big,
|
||||
}
|
||||
|
||||
impl Endian {
|
||||
/// Returns the native endianness
|
||||
pub fn native() -> Endian {
|
||||
if cfg!(target_endian = "little") {
|
||||
Endian::Little
|
||||
} else {
|
||||
Endian::Big
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The canvas object acts as a wrapper around a buffer, providing information and functions
|
||||
/// for drawing
|
||||
pub struct Canvas<'a> {
|
||||
/// A buffer for the canvas to draw to
|
||||
pub buffer: &'a mut [u8],
|
||||
/// The width in pixels of the canvas
|
||||
pub width: usize,
|
||||
/// The height in pixels of the canvas
|
||||
pub height: usize,
|
||||
/// The number of bytes between each line of pixels on the canvas
|
||||
pub stride: usize,
|
||||
/// The number of bytes contained in each pixel
|
||||
pub pixel_size: usize,
|
||||
/// The endianness of the canvas
|
||||
pub endianness: Endian,
|
||||
}
|
||||
|
||||
impl<'a> Canvas<'a> {
|
||||
/// Creates a new canvas object
|
||||
pub fn new(
|
||||
buffer: &'a mut [u8],
|
||||
width: usize,
|
||||
height: usize,
|
||||
stride: usize,
|
||||
endianness: Endian,
|
||||
) -> Canvas<'a> {
|
||||
assert!(
|
||||
stride % width == 0,
|
||||
"Incorrect Dimensions - Stride is not a multiple of width"
|
||||
);
|
||||
assert!(buffer.len() == stride * height);
|
||||
let pixel_size = stride / width;
|
||||
Canvas {
|
||||
buffer,
|
||||
width,
|
||||
height,
|
||||
stride,
|
||||
pixel_size,
|
||||
endianness,
|
||||
}
|
||||
}
|
||||
|
||||
/// Draws an object that implements the Drawable trait to the buffer
|
||||
pub fn draw<D: Drawable>(&mut self, drawable: &D) {
|
||||
drawable.draw(self);
|
||||
}
|
||||
|
||||
/// Draws a pixel at the x and y coordinate
|
||||
pub fn draw_point(&mut self, x: usize, y: usize, color: [u8; 4]) {
|
||||
let base = self.stride * y + self.pixel_size * x;
|
||||
if self.endianness == Endian::Little {
|
||||
if color[0] == 255 {
|
||||
self.buffer[base + 3] = color[0];
|
||||
self.buffer[base + 2] = color[1];
|
||||
self.buffer[base + 1] = color[2];
|
||||
self.buffer[base] = color[3];
|
||||
} else {
|
||||
for c in 0..3 {
|
||||
let alpha = f32::from(color[0]) / 255.0;
|
||||
let color_diff =
|
||||
(color[3 - c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
|
||||
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
|
||||
self.buffer[base + c] = new_color as u8;
|
||||
}
|
||||
self.buffer[base + 3] = 255 as u8;
|
||||
}
|
||||
} else if color[0] == 255 {
|
||||
self.buffer[base] = color[0];
|
||||
self.buffer[base + 1] = color[1];
|
||||
self.buffer[base + 2] = color[2];
|
||||
self.buffer[base + 3] = color[3];
|
||||
} else {
|
||||
for c in 1..4 {
|
||||
let alpha = f32::from(color[0]) / 255.0;
|
||||
let color_diff =
|
||||
(color[c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
|
||||
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
|
||||
self.buffer[base + c] = new_color as u8;
|
||||
}
|
||||
self.buffer[base] = 255 as u8;
|
||||
}
|
||||
}
|
||||
|
||||
/// Clears the entire canvas buffer by zeroing it
|
||||
pub fn clear(&mut self) {
|
||||
for i in 0..self.width * self.height * 4 {
|
||||
self.buffer[i] = 0x00;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
use std::cmp::min;
|
||||
|
||||
use line_drawing::Bresenham;
|
||||
use line_drawing::XiaolinWu;
|
||||
|
||||
use Canvas;
|
||||
use Drawable;
|
||||
|
||||
/// A drawable object that represents a line
|
||||
pub struct Line {
|
||||
/// The first point of the line
|
||||
pub pt1: (usize, usize),
|
||||
/// The second point of the line
|
||||
pub pt2: (usize, usize),
|
||||
/// The color of the line
|
||||
pub color: [u8; 4],
|
||||
/// Decides whether the line will be antialiased
|
||||
pub antialiased: bool,
|
||||
}
|
||||
|
||||
impl Line {
|
||||
/// Creates a new Line object
|
||||
pub fn new(
|
||||
pt1: (usize, usize),
|
||||
pt2: (usize, usize),
|
||||
color: [u8; 4],
|
||||
antialiased: bool,
|
||||
) -> Line {
|
||||
Line {
|
||||
pt1,
|
||||
pt2,
|
||||
color,
|
||||
antialiased,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drawable for Line {
|
||||
fn draw(&self, canvas: &mut Canvas) {
|
||||
if !self.antialiased {
|
||||
if self.pt1.0 == self.pt2.0 && self.pt1.0 < canvas.width {
|
||||
let (min_y, max_y) = if self.pt1.1 > self.pt2.1 {
|
||||
(self.pt2.1, self.pt1.1)
|
||||
} else {
|
||||
(self.pt1.1, self.pt2.1)
|
||||
};
|
||||
for y in min_y..min(max_y, canvas.height - 1) + 1 {
|
||||
canvas.draw_point(self.pt1.0, y, self.color)
|
||||
}
|
||||
} else if self.pt1.1 == self.pt2.1 && self.pt1.1 < canvas.height {
|
||||
let (min_x, max_x) = if self.pt1.0 > self.pt2.0 {
|
||||
(self.pt2.0, self.pt1.0)
|
||||
} else {
|
||||
(self.pt1.0, self.pt2.0)
|
||||
};
|
||||
for x in min_x..min(max_x, canvas.width - 1) + 1 {
|
||||
canvas.draw_point(x, self.pt1.1, self.color)
|
||||
}
|
||||
} else {
|
||||
// Angled line without antialias
|
||||
for (x, y) in Bresenham::new(
|
||||
(self.pt1.0 as isize, self.pt1.1 as isize),
|
||||
(self.pt2.0 as isize, self.pt2.1 as isize),
|
||||
) {
|
||||
if x < canvas.width as isize && y < canvas.height as isize {
|
||||
canvas.draw_point(x as usize, y as usize, self.color)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Angled line with antialias
|
||||
for ((x, y), coverage) in XiaolinWu::<f32, isize>::new(
|
||||
(self.pt1.0 as f32, self.pt1.1 as f32),
|
||||
(self.pt2.0 as f32, self.pt2.1 as f32),
|
||||
) {
|
||||
if x < canvas.width as isize && y < canvas.height as isize {
|
||||
let mut color = self.color;
|
||||
color[3] = (f32::from(color[3]) * coverage) as u8;
|
||||
canvas.draw_point(x as usize, y as usize, color)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
/// A module that contains functions and objects relating to rectangles
|
||||
pub mod rectangle;
|
|
@ -0,0 +1,153 @@
|
|||
use line::Line;
|
||||
use Canvas;
|
||||
use Drawable;
|
||||
|
||||
bitflags! {
|
||||
/// The Sides bitflag presents the sides of a rectangle
|
||||
pub struct Sides: u32 {
|
||||
/// The top side of the rectangle
|
||||
const TOP = 0b0001;
|
||||
/// The bottom side of the rectangle
|
||||
const BOTTOM = 0b0010;
|
||||
/// The left side of the rectangle
|
||||
const LEFT = 0b0100;
|
||||
/// The right side of the rectangle
|
||||
const RIGHT = 0b1000;
|
||||
/// All sides of the rectangle
|
||||
const ALL = Self::TOP.bits | Self::BOTTOM.bits | Self::LEFT.bits | Self::RIGHT.bits;
|
||||
}
|
||||
}
|
||||
|
||||
/// A drawable object that represents a rectangle
|
||||
pub struct Rectangle {
|
||||
/// Position of the top-left corner of rectangle
|
||||
pub pos: (usize, usize),
|
||||
/// The size of the rectangle to be drawn, the border will be contained within this size
|
||||
pub size: (usize, usize),
|
||||
/// The border that is drawn around the perimeter of the rectangle. It's arguments are
|
||||
/// thickness of border, color of border, sides that the border is drawn around, rounding size
|
||||
/// of the corners
|
||||
pub border: Option<(usize, [u8; 4], Sides, Option<usize>)>,
|
||||
/// The color of the fill (area) of the rectangle
|
||||
pub fill: Option<[u8; 4]>,
|
||||
}
|
||||
|
||||
impl Rectangle {
|
||||
/// Creates a new Rectangle object
|
||||
pub fn new(
|
||||
pos: (usize, usize),
|
||||
size: (usize, usize),
|
||||
border: Option<(usize, [u8; 4], Sides, Option<usize>)>,
|
||||
fill: Option<[u8; 4]>,
|
||||
) -> Rectangle {
|
||||
Rectangle {
|
||||
pos,
|
||||
size,
|
||||
border,
|
||||
fill,
|
||||
}
|
||||
}
|
||||
|
||||
fn draw_borders(&self, canvas: &mut Canvas) {
|
||||
if let Some(border) = self.border {
|
||||
for i in 0..border.0 {
|
||||
let rounding_space = if let Some(round_size) = border.3 {
|
||||
if i < round_size {
|
||||
round_size
|
||||
- ((round_size as f32).powi(2) - ((round_size - i - 1) as f32).powi(2))
|
||||
.sqrt()
|
||||
.round() as usize
|
||||
} else {
|
||||
0
|
||||
}
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
// Top line
|
||||
if border.2.contains(Sides::TOP) && canvas.width > rounding_space * 2 {
|
||||
Line::new(
|
||||
(self.pos.0 + rounding_space, self.pos.1 + i),
|
||||
(self.pos.0 + self.size.0 - rounding_space, self.pos.1 + i),
|
||||
border.1,
|
||||
false,
|
||||
)
|
||||
.draw(canvas);
|
||||
}
|
||||
// Bottom line
|
||||
if border.2.contains(Sides::BOTTOM) && canvas.width > rounding_space * 2 {
|
||||
Line::new(
|
||||
(self.pos.0 + rounding_space, self.pos.1 + self.size.1 - i),
|
||||
(
|
||||
self.pos.0 + self.size.0 - rounding_space,
|
||||
self.pos.1 + self.size.1 - i,
|
||||
),
|
||||
border.1,
|
||||
false,
|
||||
)
|
||||
.draw(canvas);
|
||||
}
|
||||
// Left line
|
||||
if border.2.contains(Sides::LEFT) && canvas.height > rounding_space * 2 {
|
||||
Line::new(
|
||||
(self.pos.0 + i, self.pos.1 + rounding_space),
|
||||
(self.pos.0 + i, self.pos.1 + self.size.1 - rounding_space),
|
||||
border.1,
|
||||
false,
|
||||
)
|
||||
.draw(canvas);
|
||||
}
|
||||
// Right line
|
||||
if border.2.contains(Sides::RIGHT) && canvas.height > rounding_space * 2 {
|
||||
Line::new(
|
||||
(self.pos.0 + self.size.0 - i, self.pos.1 + rounding_space),
|
||||
(
|
||||
self.pos.0 + self.size.0 - i,
|
||||
self.pos.1 + self.size.1 - rounding_space,
|
||||
),
|
||||
border.1,
|
||||
false,
|
||||
)
|
||||
.draw(canvas);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn draw_area(&self, canvas: &mut Canvas) {
|
||||
if let Some(fill) = self.fill {
|
||||
let (area_pos, area_size) = self.measure_area();
|
||||
for y in area_pos.1..area_pos.1 + area_size.1 + 1 {
|
||||
Line::new((area_pos.0, y), (area_pos.0 + area_size.0, y), fill, false).draw(canvas)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn measure_area(&self) -> ((usize, usize), (usize, usize)) {
|
||||
let (mut area_pos, mut area_size) = (self.pos, self.size);
|
||||
if let Some(border) = self.border {
|
||||
if border.2.contains(Sides::TOP) {
|
||||
area_pos.1 += border.0;
|
||||
area_size.1 -= border.0;
|
||||
}
|
||||
if border.2.contains(Sides::BOTTOM) {
|
||||
area_size.1 -= border.0;
|
||||
}
|
||||
if border.2.contains(Sides::LEFT) {
|
||||
area_pos.0 += border.0;
|
||||
area_size.0 -= border.0;
|
||||
}
|
||||
if border.2.contains(Sides::RIGHT) {
|
||||
area_size.0 -= border.0;
|
||||
}
|
||||
}
|
||||
(area_pos, area_size)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drawable for Rectangle {
|
||||
fn draw(&self, canvas: &mut Canvas) {
|
||||
self.draw_borders(canvas);
|
||||
self.draw_area(canvas);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,162 @@
|
|||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use xdg::BaseDirectories;
|
||||
|
||||
use xml::reader::{EventReader, XmlEvent};
|
||||
|
||||
use walkdir::WalkDir;
|
||||
|
||||
/// Locates fontconfig config
|
||||
fn get_config() -> Option<PathBuf> {
|
||||
let xdg_dirs = BaseDirectories::with_prefix("fontconfig").unwrap();
|
||||
xdg_dirs.find_config_file("fonts.conf").or_else(|| {
|
||||
let config = Path::new("/etc/fonts/fonts.conf");
|
||||
if config.exists() {
|
||||
Some(config.into())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_config(path: &Path) -> Vec<(Vec<String>, String)> {
|
||||
let config_file = File::open(path).unwrap();
|
||||
let parser = EventReader::new(config_file);
|
||||
let mut tracking_tags: Vec<String> = Vec::new();
|
||||
let mut xml_data: Vec<(Vec<String>, String)> = Vec::new();
|
||||
|
||||
for e in parser {
|
||||
match e {
|
||||
Ok(XmlEvent::StartElement { name, .. }) => {
|
||||
tracking_tags.push(name.to_string());
|
||||
}
|
||||
Ok(XmlEvent::CData(data)) => {
|
||||
xml_data.push((tracking_tags.clone(), data));
|
||||
}
|
||||
Ok(XmlEvent::Characters(data)) => {
|
||||
xml_data.push((tracking_tags.clone(), data));
|
||||
}
|
||||
Ok(XmlEvent::EndElement { .. }) => {
|
||||
tracking_tags.pop();
|
||||
}
|
||||
Err(e) => panic!(e),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
xml_data
|
||||
}
|
||||
|
||||
/// Represents the main fontconfig config file
|
||||
pub struct FontConfig {
|
||||
location: PathBuf,
|
||||
data: Vec<(Vec<String>, String)>,
|
||||
}
|
||||
|
||||
impl FontConfig {
|
||||
/// Creates a new FontConfig object by looking for the fontconfig config file
|
||||
pub fn new() -> Result<FontConfig, ()> {
|
||||
let location = get_config().ok_or(())?;
|
||||
let data = parse_config(&location);
|
||||
Ok(FontConfig {
|
||||
location: location.to_path_buf(),
|
||||
data,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the location of the fontconfig config file being used
|
||||
pub fn get_location(&self) -> &Path {
|
||||
&self.location
|
||||
}
|
||||
|
||||
/// Get the directories that contain fonts
|
||||
pub fn get_font_dirs(&self) -> Vec<PathBuf> {
|
||||
let mut dirs = Vec::new();
|
||||
for entry in &self.data {
|
||||
if entry.0.last() == Some(&"dir".to_string()) {
|
||||
let path = PathBuf::from(entry.1.clone());
|
||||
if path.exists() {
|
||||
dirs.push(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
dirs
|
||||
}
|
||||
|
||||
/// Return all fonts installed on the system
|
||||
pub fn get_fonts(&self) -> Result<Vec<PathBuf>, ::std::io::Error> {
|
||||
let mut fonts = Vec::new();
|
||||
for dir in self.get_font_dirs() {
|
||||
for file in WalkDir::new(dir)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|p| p.file_type().is_file())
|
||||
{
|
||||
let path = file.into_path();
|
||||
if let Some(extension) = path.extension() {
|
||||
match extension.to_str() {
|
||||
Some("ttf") | Some("otf") => fonts.push(path.clone()),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(fonts)
|
||||
}
|
||||
|
||||
/// Return all 'fonts.dir' files in font directories
|
||||
pub fn get_font_dir_files(&self) -> Result<Vec<PathBuf>, ::std::io::Error> {
|
||||
let mut fonts = Vec::new();
|
||||
for dir in self.get_font_dirs() {
|
||||
for file in WalkDir::new(dir)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|p| p.file_type().is_file())
|
||||
{
|
||||
let path = file.into_path();
|
||||
if let Some(file_name) = path.clone().file_name() {
|
||||
if file_name.to_str() == Some("fonts.dir") {
|
||||
fonts.push(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(fonts)
|
||||
}
|
||||
|
||||
/// Returns the paths of regular fonts belonging to a specific family installed on the system
|
||||
pub fn get_regular_family_fonts(&self, family: &str) -> Result<Vec<PathBuf>, ::std::io::Error> {
|
||||
let fonts_dir_files = self.get_font_dir_files()?;
|
||||
let mut fonts: Vec<PathBuf> = Vec::new();
|
||||
for dir in fonts_dir_files {
|
||||
let mut file = ::std::fs::File::open(dir.clone()).unwrap();
|
||||
let mut buf = String::new();
|
||||
file.read_to_string(&mut buf)?;
|
||||
|
||||
for line in buf.lines().filter(|l| l.find("medium-r-normal").is_some()) {
|
||||
if let Some(split) = line.find(' ') {
|
||||
let name = line[..split].to_string();
|
||||
let settings = line[split..].to_string();
|
||||
let mut char_buf = String::new();
|
||||
for c in settings.chars() {
|
||||
if c == ' ' || c == '-' {
|
||||
char_buf.clear()
|
||||
} else {
|
||||
char_buf.push(c);
|
||||
if char_buf == family {
|
||||
let path = dir.with_file_name(name);
|
||||
if !fonts.contains(&path) {
|
||||
fonts.push(path);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(fonts)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
/// A module that contains functions and objects relating to fontconfig
|
||||
pub mod fontconfig;
|
||||
|
||||
use rusttype::{point, Font, Scale, SharedBytes, VMetrics};
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use std::path::PathBuf;
|
||||
use Canvas;
|
||||
use Drawable;
|
||||
|
||||
/// A drawable object that represents text
|
||||
pub struct Text<'a> {
|
||||
/// The position of the text on the canvas
|
||||
pub pos: (usize, usize),
|
||||
/// The color of the text
|
||||
pub color: [u8; 4],
|
||||
/// The text that is rendered to the canvas on draw
|
||||
pub text: String,
|
||||
/// The font used in rendering the text
|
||||
pub font: Font<'a>,
|
||||
/// The scale that is applied to the text
|
||||
pub scale: Scale,
|
||||
/// The vertical metrics of the text
|
||||
pub v_metrics: VMetrics,
|
||||
}
|
||||
|
||||
/// Loads a font file into a `Vec<u8>`
|
||||
pub fn load_font_file<P: Into<PathBuf>>(path: P) -> Vec<u8> {
|
||||
let mut data: Vec<u8> = Vec::new();
|
||||
let mut file = File::open(path.into()).expect("Could not open font file");
|
||||
file.read_to_end(&mut data)
|
||||
.expect("Could not read font file");
|
||||
data
|
||||
}
|
||||
|
||||
impl<'a> Text<'a> {
|
||||
/// Creates a new Text object
|
||||
pub fn new<P: Into<SharedBytes<'a>>, T: Into<String>>(
|
||||
pos: (usize, usize),
|
||||
color: [u8; 4],
|
||||
font_data: P,
|
||||
height: f32,
|
||||
width_scale: f32,
|
||||
text: T,
|
||||
) -> Text<'a> {
|
||||
let text = text.into();
|
||||
// Create font
|
||||
let font = Font::from_bytes(font_data).expect("Error constructing Font");
|
||||
// Create scale
|
||||
let scale = Scale {
|
||||
x: height * width_scale,
|
||||
y: height,
|
||||
};
|
||||
// Create needed metrics
|
||||
let v_metrics = font.v_metrics(scale);
|
||||
Text {
|
||||
pos,
|
||||
color,
|
||||
text: text.clone(),
|
||||
scale,
|
||||
v_metrics,
|
||||
font,
|
||||
}
|
||||
}
|
||||
|
||||
fn draw_text(&self, canvas: &mut Canvas) {
|
||||
let glyphs: Vec<_> = self
|
||||
.font
|
||||
.layout(&self.text, self.scale, point(0.0, self.v_metrics.ascent))
|
||||
.collect();
|
||||
for glyph in glyphs {
|
||||
if let Some(bounding_box) = glyph.pixel_bounding_box() {
|
||||
glyph.draw(|x, y, v| {
|
||||
let x = ((x as usize + self.pos.0) as i32 + bounding_box.min.x) as usize;
|
||||
let y = ((y as usize + self.pos.1) as i32 + bounding_box.min.y) as usize;
|
||||
|
||||
if x < canvas.width && y < canvas.height {
|
||||
let mut color = self.color;
|
||||
color[0] = (f32::from(color[0]) * v) as u8;
|
||||
canvas.draw_point(x, y, color);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates the width in pixels of the text
|
||||
pub fn get_width(&self) -> usize {
|
||||
let glyphs: Vec<_> = self
|
||||
.font
|
||||
.layout(&self.text, self.scale, point(0.0, self.v_metrics.ascent))
|
||||
.collect();
|
||||
let min_x = glyphs
|
||||
.first()
|
||||
.map(|g| {
|
||||
if let Some(bb) = g.pixel_bounding_box() {
|
||||
bb.min.x
|
||||
} else {
|
||||
g.position().x as i32
|
||||
}
|
||||
})
|
||||
.unwrap_or(0);
|
||||
let max_x = glyphs
|
||||
.last()
|
||||
.map(|g| {
|
||||
if let Some(bb) = g.pixel_bounding_box() {
|
||||
bb.max.x
|
||||
} else {
|
||||
(g.position().x + g.unpositioned().h_metrics().advance_width) as i32
|
||||
}
|
||||
})
|
||||
.unwrap_or(0);
|
||||
(max_x - min_x) as usize
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drawable for Text<'a> {
|
||||
fn draw(&self, canvas: &mut Canvas) {
|
||||
self.draw_text(canvas);
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"files":{"Cargo.toml":"dbe01d8dfe2ea291ca94d56acfe3a401bbaf81d960be2d2afc5e916f755a9ab7","src/lib.rs":"d364185fd66b549a70b935fdfec041b55a10cdc0dd901fd95b38554f08cf0923"},"package":"000444226fcff248f2bc4c7625be32c63caccfecc2723a2b9f78a7487a49c407"}
|
|
@ -0,0 +1,42 @@
|
|||
"""
|
||||
cargo-raze crate build file.
|
||||
|
||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||
"""
|
||||
package(default_visibility = [
|
||||
# Public for visibility by "@raze__crate__version//" targets.
|
||||
#
|
||||
# Prefer access through "//third_party/cargo", which limits external
|
||||
# visibility to explicit Cargo.toml dependencies.
|
||||
"//visibility:public",
|
||||
])
|
||||
|
||||
licenses([
|
||||
"notice", # "MIT"
|
||||
])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||
"rust_library",
|
||||
"rust_binary",
|
||||
"rust_test",
|
||||
)
|
||||
|
||||
|
||||
|
||||
rust_library(
|
||||
name = "android_glue",
|
||||
crate_root = "src/lib.rs",
|
||||
crate_type = "lib",
|
||||
edition = "2015",
|
||||
srcs = glob(["**/*.rs"]),
|
||||
deps = [
|
||||
],
|
||||
rustc_flags = [
|
||||
"--cap-lints=allow",
|
||||
],
|
||||
version = "0.2.3",
|
||||
crate_features = [
|
||||
],
|
||||
)
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
[package]
|
||||
|
||||
name = "android_glue"
|
||||
version = "0.2.3"
|
||||
authors = ["Pierre Krieger <pierre.krieger1708@gmail.com>"]
|
||||
license = "MIT"
|
||||
description = "Glue for the Android JNI"
|
||||
repository = "https://github.com/tomaka/android-rs-glue"
|
|
@ -0,0 +1,152 @@
|
|||
#![cfg(target_os = "android")]
|
||||
|
||||
extern {
|
||||
fn cargo_apk_injected_glue_get_native_window() -> *const c_void;
|
||||
fn cargo_apk_injected_glue_add_sender(sender: *mut ());
|
||||
fn cargo_apk_injected_glue_add_sender_missing(sender: *mut ());
|
||||
fn cargo_apk_injected_glue_add_sync_event_handler(sender: *mut ());
|
||||
fn cargo_apk_injected_glue_remove_sync_event_handler(sender: *mut ());
|
||||
fn cargo_apk_injected_glue_set_multitouch(multitouch: bool);
|
||||
fn cargo_apk_injected_glue_write_log(ptr: *const (), len: usize);
|
||||
fn cargo_apk_injected_glue_load_asset(ptr: *const (), len: usize) -> *mut c_void;
|
||||
fn cargo_apk_injected_glue_wake_event_loop();
|
||||
}
|
||||
|
||||
use std::mem;
|
||||
use std::os::raw::c_void;
|
||||
use std::sync::mpsc::Sender;
|
||||
|
||||
/// An event triggered by the Android environment.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum Event {
|
||||
EventMotion(Motion),
|
||||
EventKeyUp,
|
||||
EventKeyDown,
|
||||
InitWindow,
|
||||
SaveState,
|
||||
TermWindow,
|
||||
GainedFocus,
|
||||
LostFocus,
|
||||
InputChanged,
|
||||
WindowResized,
|
||||
WindowRedrawNeeded,
|
||||
ContentRectChanged,
|
||||
ConfigChanged,
|
||||
LowMemory,
|
||||
Start,
|
||||
Resume,
|
||||
Pause,
|
||||
Stop,
|
||||
Destroy,
|
||||
Wake
|
||||
}
|
||||
|
||||
/// Data about a motion event.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Motion {
|
||||
pub action: MotionAction,
|
||||
pub pointer_id: i32,
|
||||
pub x: f32,
|
||||
pub y: f32,
|
||||
}
|
||||
|
||||
/// The type of pointer action in a motion event.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum MotionAction {
|
||||
Down,
|
||||
Move,
|
||||
Up,
|
||||
Cancel,
|
||||
}
|
||||
|
||||
pub enum AssetError {
|
||||
AssetMissing,
|
||||
EmptyBuffer,
|
||||
}
|
||||
|
||||
// Trait used to dispatch sync events from the polling loop thread.
|
||||
pub trait SyncEventHandler {
|
||||
fn handle(&mut self, event: &Event);
|
||||
}
|
||||
|
||||
/// Adds a sender where events will be sent to.
|
||||
#[inline]
|
||||
pub fn add_sender(sender: Sender<Event>) {
|
||||
unsafe {
|
||||
let sender = Box::into_raw(Box::new(sender)) as *mut _;
|
||||
cargo_apk_injected_glue_add_sender(sender);
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds a SyncEventHandler which will receive sync events from the polling loop.
|
||||
#[inline]
|
||||
pub fn add_sync_event_handler(handler: Box<SyncEventHandler>) {
|
||||
unsafe {
|
||||
let handler = Box::into_raw(Box::new(handler)) as *mut _;
|
||||
cargo_apk_injected_glue_add_sync_event_handler(handler);
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes a SyncEventHandler.
|
||||
#[inline]
|
||||
pub fn remove_sync_event_handler(handler: *const SyncEventHandler) {
|
||||
unsafe {
|
||||
let handler = Box::into_raw(Box::new(handler)) as *mut _;
|
||||
cargo_apk_injected_glue_remove_sync_event_handler(handler);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn set_multitouch(multitouch: bool) {
|
||||
unsafe {
|
||||
cargo_apk_injected_glue_set_multitouch(multitouch);
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds a sender where events will be sent to, but also sends
|
||||
/// any missing events to the sender object.
|
||||
///
|
||||
/// The missing events happen when the application starts, but before
|
||||
/// any senders are registered. Since these might be important to certain
|
||||
/// applications, this function provides that support.
|
||||
#[inline]
|
||||
pub fn add_sender_missing(sender: Sender<Event>) {
|
||||
unsafe {
|
||||
let sender = Box::into_raw(Box::new(sender)) as *mut _;
|
||||
cargo_apk_injected_glue_add_sender_missing(sender);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a handle to the native window.
|
||||
#[inline]
|
||||
pub unsafe fn get_native_window() -> *const c_void {
|
||||
cargo_apk_injected_glue_get_native_window()
|
||||
}
|
||||
|
||||
///
|
||||
#[inline]
|
||||
pub fn write_log(message: &str) {
|
||||
unsafe {
|
||||
let (message_ptr, message_len) = mem::transmute(message);
|
||||
cargo_apk_injected_glue_write_log(message_ptr, message_len);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn load_asset(filename: &str) -> Result<Vec<u8>, AssetError> {
|
||||
unsafe {
|
||||
let (filename_ptr, filename_len) = mem::transmute(filename);
|
||||
let data = cargo_apk_injected_glue_load_asset(filename_ptr, filename_len);
|
||||
let data: Box<Result<Vec<u8>, AssetError>> = Box::from_raw(data as *mut _);
|
||||
*data
|
||||
}
|
||||
}
|
||||
|
||||
// Wakes the event poll asynchronously and sends a Event::Wake event to the senders.
|
||||
// This method can be called on any thread. This method returns immediately.
|
||||
#[inline]
|
||||
pub fn wake_event_loop() {
|
||||
unsafe {
|
||||
cargo_apk_injected_glue_wake_event_loop();
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"files":{"Cargo.toml":"bcd8107c8c85e92fb0b291014b8ba62bb1f076ec297028fd44e264765c303f2f","LICENSE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","README.md":"5e965438ec90b7ede0a5c93c482a91536759c147e215f7471d8534f121feb244","src/abs_diff_eq.rs":"b11c8128d1460ee9739abeedc49d8923fde2acb953481661f5adc94feef86761","src/lib.rs":"075aa97495af343ad128b59cdec0206eb06d30d153abe1c38226d9d0f55c0fc9","src/macros.rs":"a9b59cce0cdb3e154321e7ba6ed2d53c21eeb8a46840a761e36e0259b44a59c4","src/relative_eq.rs":"ff292f125853ff846adc29cfa260042bab5e329e7b47b732e863587b8946b9d2","src/ulps_eq.rs":"21ad336b9ed8762eb3b7b1dd1b47b001edc8497d85c8c385bf8b2964be5687e7","tests/abs_diff_eq.rs":"9df9b48842dbc61b04b0328b64f2ce48b6e66e9538178babd58a9e52adeaddaf","tests/macro_import.rs":"006d813c99217d251a30b2f248548e7ad385754f88f6d9838d1dee866ea4b7f4","tests/macros.rs":"697241db4db66633cb53324bb127c64fd3b544236055bfe1721abb64a1dfab23","tests/relative_eq.rs":"c114edb6af07a2ac126e167682dd5d677d5591217f48bfdba150f866dfe4fdaf","tests/ulps_eq.rs":"ef6d57b98394fc87e724e26de7a3461426444563ec962661f660875b0702aeb6"},"package":"f0e60b75072ecd4168020818c0107f2857bb6c4e64252d8d3983f6263b40a5c3"}
|
|
@ -0,0 +1,48 @@
|
|||
"""
|
||||
cargo-raze crate build file.
|
||||
|
||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||
"""
|
||||
package(default_visibility = [
|
||||
# Public for visibility by "@raze__crate__version//" targets.
|
||||
#
|
||||
# Prefer access through "//third_party/cargo", which limits external
|
||||
# visibility to explicit Cargo.toml dependencies.
|
||||
"//visibility:public",
|
||||
])
|
||||
|
||||
licenses([
|
||||
"notice", # "Apache-2.0"
|
||||
])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||
"rust_library",
|
||||
"rust_binary",
|
||||
"rust_test",
|
||||
)
|
||||
|
||||
|
||||
# Unsupported target "abs_diff_eq" with type "test" omitted
|
||||
|
||||
rust_library(
|
||||
name = "approx",
|
||||
crate_root = "src/lib.rs",
|
||||
crate_type = "lib",
|
||||
edition = "2015",
|
||||
srcs = glob(["**/*.rs"]),
|
||||
deps = [
|
||||
"//third_party/cargo/vendor/num-traits-0.2.11:num_traits",
|
||||
],
|
||||
rustc_flags = [
|
||||
"--cap-lints=allow",
|
||||
],
|
||||
version = "0.3.2",
|
||||
crate_features = [
|
||||
],
|
||||
)
|
||||
|
||||
# Unsupported target "macro_import" with type "test" omitted
|
||||
# Unsupported target "macros" with type "test" omitted
|
||||
# Unsupported target "relative_eq" with type "test" omitted
|
||||
# Unsupported target "ulps_eq" with type "test" omitted
|
|
@ -0,0 +1,39 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
name = "approx"
|
||||
version = "0.3.2"
|
||||
authors = ["Brendan Zabarauskas <bjzaba@yahoo.com.au>"]
|
||||
description = "Approximate floating point equality comparisons and assertions."
|
||||
homepage = "https://github.com/brendanzab/approx"
|
||||
documentation = "https://docs.rs/approx"
|
||||
readme = "README.md"
|
||||
keywords = ["approximate", "assert", "comparison", "equality", "float"]
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/brendanzab/approx"
|
||||
[package.metadata.docs.rs]
|
||||
features = ["std", "num-complex"]
|
||||
|
||||
[lib]
|
||||
name = "approx"
|
||||
[dependencies.num-complex]
|
||||
version = "0.2.0"
|
||||
optional = true
|
||||
|
||||
[dependencies.num-traits]
|
||||
version = "0.2.0"
|
||||
default_features = false
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
std = []
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,20 @@
|
|||
# approx
|
||||
|
||||
[![Build Status][travis_badge]][travis_url]
|
||||
[![Version][version_badge]][crate_url]
|
||||
[![Documentation][docs_badge]][docs_url]
|
||||
[![Downloads][downloads_badge]][crate_url]
|
||||
[![License][license_badge]][license_url]
|
||||
|
||||
[travis_badge]: https://travis-ci.org/brendanzab/approx.svg?branch=master
|
||||
[docs_badge]: https://docs.rs/approx/badge.svg
|
||||
[version_badge]: https://img.shields.io/crates/v/approx.svg
|
||||
[license_badge]: https://img.shields.io/crates/l/approx.svg
|
||||
[downloads_badge]: https://img.shields.io/crates/d/approx.svg
|
||||
|
||||
[travis_url]: https://travis-ci.org/brendanzab/approx
|
||||
[docs_url]: https://docs.rs/approx
|
||||
[crate_url]: https://crates.io/crates/approx
|
||||
[license_url]: https://github.com/brendanzab/approx/blob/master/LICENSE
|
||||
|
||||
Approximate floating point equality comparisons and assertions for the Rust Programming Language.
|
|
@ -0,0 +1,185 @@
|
|||
#[cfg(feature = "num-complex")]
|
||||
use num_complex::Complex;
|
||||
#[cfg(not(feature = "std"))]
|
||||
use num_traits::float::FloatCore;
|
||||
use std::{cell, f32, f64};
|
||||
|
||||
/// Equality that is defined using the absolute difference of two numbers.
|
||||
pub trait AbsDiffEq<Rhs = Self>: PartialEq<Rhs>
|
||||
where
|
||||
Rhs: ?Sized,
|
||||
{
|
||||
/// Used for specifying relative comparisons.
|
||||
type Epsilon;
|
||||
|
||||
/// The default tolerance to use when testing values that are close together.
|
||||
///
|
||||
/// This is used when no `epsilon` value is supplied to the `abs_diff_eq!`, `relative_eq!`, or
|
||||
/// `ulps_eq!` macros.
|
||||
fn default_epsilon() -> Self::Epsilon;
|
||||
|
||||
/// A test for equality that uses the absolute difference to compute the approximate
|
||||
/// equality of two numbers.
|
||||
fn abs_diff_eq(&self, other: &Rhs, epsilon: Self::Epsilon) -> bool;
|
||||
|
||||
/// The inverse of `ApproxEq::abs_diff_eq`.
|
||||
fn abs_diff_ne(&self, other: &Rhs, epsilon: Self::Epsilon) -> bool {
|
||||
!Self::abs_diff_eq(self, other, epsilon)
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Base implementations
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
macro_rules! impl_unsigned_abs_diff_eq {
|
||||
($T:ident, $default_epsilon:expr) => {
|
||||
impl AbsDiffEq for $T {
|
||||
type Epsilon = $T;
|
||||
|
||||
#[inline]
|
||||
fn default_epsilon() -> $T {
|
||||
$default_epsilon
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_diff_eq(&self, other: &$T, epsilon: $T) -> bool {
|
||||
(if self > other {
|
||||
self - other
|
||||
} else {
|
||||
other - self
|
||||
}) <= epsilon
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_unsigned_abs_diff_eq!(u8, 0);
|
||||
impl_unsigned_abs_diff_eq!(u16, 0);
|
||||
impl_unsigned_abs_diff_eq!(u32, 0);
|
||||
impl_unsigned_abs_diff_eq!(u64, 0);
|
||||
impl_unsigned_abs_diff_eq!(usize, 0);
|
||||
|
||||
macro_rules! impl_signed_abs_diff_eq {
|
||||
($T:ident, $default_epsilon:expr) => {
|
||||
impl AbsDiffEq for $T {
|
||||
type Epsilon = $T;
|
||||
|
||||
#[inline]
|
||||
fn default_epsilon() -> $T {
|
||||
$default_epsilon
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_diff_eq(&self, other: &$T, epsilon: $T) -> bool {
|
||||
$T::abs(self - other) <= epsilon
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_signed_abs_diff_eq!(i8, 0);
|
||||
impl_signed_abs_diff_eq!(i16, 0);
|
||||
impl_signed_abs_diff_eq!(i32, 0);
|
||||
impl_signed_abs_diff_eq!(i64, 0);
|
||||
impl_signed_abs_diff_eq!(isize, 0);
|
||||
impl_signed_abs_diff_eq!(f32, f32::EPSILON);
|
||||
impl_signed_abs_diff_eq!(f64, f64::EPSILON);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Derived implementations
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
impl<'a, T: AbsDiffEq + ?Sized> AbsDiffEq for &'a T {
|
||||
type Epsilon = T::Epsilon;
|
||||
|
||||
#[inline]
|
||||
fn default_epsilon() -> T::Epsilon {
|
||||
T::default_epsilon()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_diff_eq(&self, other: &&'a T, epsilon: T::Epsilon) -> bool {
|
||||
T::abs_diff_eq(*self, *other, epsilon)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: AbsDiffEq + ?Sized> AbsDiffEq for &'a mut T {
|
||||
type Epsilon = T::Epsilon;
|
||||
|
||||
#[inline]
|
||||
fn default_epsilon() -> T::Epsilon {
|
||||
T::default_epsilon()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_diff_eq(&self, other: &&'a mut T, epsilon: T::Epsilon) -> bool {
|
||||
T::abs_diff_eq(*self, *other, epsilon)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AbsDiffEq + Copy> AbsDiffEq for cell::Cell<T> {
|
||||
type Epsilon = T::Epsilon;
|
||||
|
||||
#[inline]
|
||||
fn default_epsilon() -> T::Epsilon {
|
||||
T::default_epsilon()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_diff_eq(&self, other: &cell::Cell<T>, epsilon: T::Epsilon) -> bool {
|
||||
T::abs_diff_eq(&self.get(), &other.get(), epsilon)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AbsDiffEq + ?Sized> AbsDiffEq for cell::RefCell<T> {
|
||||
type Epsilon = T::Epsilon;
|
||||
|
||||
#[inline]
|
||||
fn default_epsilon() -> T::Epsilon {
|
||||
T::default_epsilon()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_diff_eq(&self, other: &cell::RefCell<T>, epsilon: T::Epsilon) -> bool {
|
||||
T::abs_diff_eq(&self.borrow(), &other.borrow(), epsilon)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, B> AbsDiffEq<[B]> for [A]
|
||||
where
|
||||
A: AbsDiffEq<B>,
|
||||
A::Epsilon: Clone,
|
||||
{
|
||||
type Epsilon = A::Epsilon;
|
||||
|
||||
#[inline]
|
||||
fn default_epsilon() -> A::Epsilon {
|
||||
A::default_epsilon()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_diff_eq(&self, other: &[B], epsilon: A::Epsilon) -> bool {
|
||||
self.len() == other.len()
|
||||
&& Iterator::zip(self.iter(), other).all(|(x, y)| A::abs_diff_eq(x, y, epsilon.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "num-complex")]
|
||||
impl<T: AbsDiffEq> AbsDiffEq for Complex<T>
|
||||
where
|
||||
T::Epsilon: Clone,
|
||||
{
|
||||
type Epsilon = T::Epsilon;
|
||||
|
||||
#[inline]
|
||||
fn default_epsilon() -> T::Epsilon {
|
||||
T::default_epsilon()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_diff_eq(&self, other: &Complex<T>, epsilon: T::Epsilon) -> bool {
|
||||
T::abs_diff_eq(&self.re, &other.re, epsilon.clone())
|
||||
&& T::abs_diff_eq(&self.im, &other.im, epsilon.clone())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,384 @@
|
|||
// Copyright 2015 Brendan Zabarauskas
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! A crate that provides facilities for testing the approximate equality of floating-point
|
||||
//! based types, using either relative difference, or units in the last place (ULPs)
|
||||
//! comparisons.
|
||||
//!
|
||||
//! You can also use the `approx_{eq, ne}!` `assert_approx_{eq, ne}!` macros to test for equality
|
||||
//! using a more positional style.
|
||||
//!
|
||||
//! ```rust
|
||||
//! #[macro_use]
|
||||
//! extern crate approx;
|
||||
//!
|
||||
//! use std::f64;
|
||||
//!
|
||||
//! # fn main() {
|
||||
//! abs_diff_eq!(1.0, 1.0);
|
||||
//! abs_diff_eq!(1.0, 1.0, epsilon = f64::EPSILON);
|
||||
//!
|
||||
//! relative_eq!(1.0, 1.0);
|
||||
//! relative_eq!(1.0, 1.0, epsilon = f64::EPSILON);
|
||||
//! relative_eq!(1.0, 1.0, max_relative = 1.0);
|
||||
//! relative_eq!(1.0, 1.0, epsilon = f64::EPSILON, max_relative = 1.0);
|
||||
//! relative_eq!(1.0, 1.0, max_relative = 1.0, epsilon = f64::EPSILON);
|
||||
//!
|
||||
//! ulps_eq!(1.0, 1.0);
|
||||
//! ulps_eq!(1.0, 1.0, epsilon = f64::EPSILON);
|
||||
//! ulps_eq!(1.0, 1.0, max_ulps = 4);
|
||||
//! ulps_eq!(1.0, 1.0, epsilon = f64::EPSILON, max_ulps = 4);
|
||||
//! ulps_eq!(1.0, 1.0, max_ulps = 4, epsilon = f64::EPSILON);
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! # Implementing approximate equality for custom types
|
||||
//!
|
||||
//! The `ApproxEq` trait allows approximate equalities to be implemented on types, based on the
|
||||
//! fundamental floating point implementations.
|
||||
//!
|
||||
//! For example, we might want to be able to do approximate assertions on a complex number type:
|
||||
//!
|
||||
//! ```rust
|
||||
//! #[macro_use]
|
||||
//! extern crate approx;
|
||||
//! # use approx::{AbsDiffEq, RelativeEq, UlpsEq};
|
||||
//!
|
||||
//! #[derive(Debug, PartialEq)]
|
||||
//! struct Complex<T> {
|
||||
//! x: T,
|
||||
//! i: T,
|
||||
//! }
|
||||
//! # impl<T: AbsDiffEq> AbsDiffEq for Complex<T> where T::Epsilon: Copy {
|
||||
//! # type Epsilon = T::Epsilon;
|
||||
//! # fn default_epsilon() -> T::Epsilon { T::default_epsilon() }
|
||||
//! # fn abs_diff_eq(&self, other: &Self, epsilon: T::Epsilon) -> bool {
|
||||
//! # T::abs_diff_eq(&self.x, &other.x, epsilon) &&
|
||||
//! # T::abs_diff_eq(&self.i, &other.i, epsilon)
|
||||
//! # }
|
||||
//! # }
|
||||
//! # impl<T: RelativeEq> RelativeEq for Complex<T> where T::Epsilon: Copy {
|
||||
//! # fn default_max_relative() -> T::Epsilon { T::default_max_relative() }
|
||||
//! # fn relative_eq(&self, other: &Self, epsilon: T::Epsilon, max_relative: T::Epsilon)
|
||||
//! # -> bool {
|
||||
//! # T::relative_eq(&self.x, &other.x, epsilon, max_relative) &&
|
||||
//! # T::relative_eq(&self.i, &other.i, epsilon, max_relative)
|
||||
//! # }
|
||||
//! # }
|
||||
//! # impl<T: UlpsEq> UlpsEq for Complex<T> where T::Epsilon: Copy {
|
||||
//! # fn default_max_ulps() -> u32 { T::default_max_ulps() }
|
||||
//! # fn ulps_eq(&self, other: &Self, epsilon: T::Epsilon, max_ulps: u32) -> bool {
|
||||
//! # T::ulps_eq(&self.x, &other.x, epsilon, max_ulps) &&
|
||||
//! # T::ulps_eq(&self.i, &other.i, epsilon, max_ulps)
|
||||
//! # }
|
||||
//! # }
|
||||
//!
|
||||
//! # fn main() {
|
||||
//! let x = Complex { x: 1.2, i: 2.3 };
|
||||
//!
|
||||
//! assert_relative_eq!(x, x);
|
||||
//! assert_ulps_eq!(x, x, max_ulps = 4);
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! To do this we can implement `AbsDiffEq`, `RelativeEq` and `UlpsEq` generically in terms of a
|
||||
//! type parameter that also implements `ApproxEq`, `RelativeEq` and `UlpsEq` respectively. This
|
||||
//! means that we can make comparisons for either `Complex<f32>` or `Complex<f64>`:
|
||||
//!
|
||||
//! ```rust
|
||||
//! # use approx::{AbsDiffEq, RelativeEq, UlpsEq};
|
||||
//! # #[derive(Debug, PartialEq)]
|
||||
//! # struct Complex<T> { x: T, i: T, }
|
||||
//! #
|
||||
//! impl<T: AbsDiffEq> AbsDiffEq for Complex<T> where
|
||||
//! T::Epsilon: Copy,
|
||||
//! {
|
||||
//! type Epsilon = T::Epsilon;
|
||||
//!
|
||||
//! fn default_epsilon() -> T::Epsilon {
|
||||
//! T::default_epsilon()
|
||||
//! }
|
||||
//!
|
||||
//! fn abs_diff_eq(&self, other: &Self, epsilon: T::Epsilon) -> bool {
|
||||
//! T::abs_diff_eq(&self.x, &other.x, epsilon) &&
|
||||
//! T::abs_diff_eq(&self.i, &other.i, epsilon)
|
||||
//! }
|
||||
//! }
|
||||
//!
|
||||
//! impl<T: RelativeEq> RelativeEq for Complex<T> where
|
||||
//! T::Epsilon: Copy,
|
||||
//! {
|
||||
//! fn default_max_relative() -> T::Epsilon {
|
||||
//! T::default_max_relative()
|
||||
//! }
|
||||
//!
|
||||
//! fn relative_eq(&self, other: &Self, epsilon: T::Epsilon, max_relative: T::Epsilon) -> bool {
|
||||
//! T::relative_eq(&self.x, &other.x, epsilon, max_relative) &&
|
||||
//! T::relative_eq(&self.i, &other.i, epsilon, max_relative)
|
||||
//! }
|
||||
//! }
|
||||
//!
|
||||
//! impl<T: UlpsEq> UlpsEq for Complex<T> where
|
||||
//! T::Epsilon: Copy,
|
||||
//! {
|
||||
//! fn default_max_ulps() -> u32 {
|
||||
//! T::default_max_ulps()
|
||||
//! }
|
||||
//!
|
||||
//! fn ulps_eq(&self, other: &Self, epsilon: T::Epsilon, max_ulps: u32) -> bool {
|
||||
//! T::ulps_eq(&self.x, &other.x, epsilon, max_ulps) &&
|
||||
//! T::ulps_eq(&self.i, &other.i, epsilon, max_ulps)
|
||||
//! }
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! # References
|
||||
//!
|
||||
//! Floating point is hard! Thanks goes to these links for helping to make things a _little_
|
||||
//! easier to understand:
|
||||
//!
|
||||
//! - [Comparing Floating Point Numbers, 2012 Edition]
|
||||
//! (https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/)
|
||||
//! - [The Floating Point Guide - Comparison](http://floating-point-gui.de/errors/comparison/)
|
||||
//! - [What Every Computer Scientist Should Know About Floating-Point Arithmetic]
|
||||
//! (https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html)
|
||||
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
|
||||
#[cfg(feature = "num-complex")]
|
||||
extern crate num_complex;
|
||||
extern crate num_traits;
|
||||
|
||||
#[cfg(not(feature = "std"))]
|
||||
use core as std;
|
||||
|
||||
mod abs_diff_eq;
|
||||
mod relative_eq;
|
||||
mod ulps_eq;
|
||||
|
||||
mod macros;
|
||||
|
||||
pub use abs_diff_eq::AbsDiffEq;
|
||||
pub use relative_eq::RelativeEq;
|
||||
pub use ulps_eq::UlpsEq;
|
||||
|
||||
/// The requisite parameters for testing for approximate equality using a
|
||||
/// absolute difference based comparison.
|
||||
///
|
||||
/// This is not normally used directly, rather via the
|
||||
/// `assert_abs_diff_{eq|ne}!` and `abs_diff_{eq|ne}!` macros.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use std::f64;
|
||||
/// use approx::AbsDiff;
|
||||
///
|
||||
/// AbsDiff::default().eq(&1.0, &1.0);
|
||||
/// AbsDiff::default().epsilon(f64::EPSILON).eq(&1.0, &1.0);
|
||||
/// ```
|
||||
pub struct AbsDiff<A, B = A>
|
||||
where
|
||||
A: AbsDiffEq<B> + ?Sized,
|
||||
B: ?Sized,
|
||||
{
|
||||
/// The tolerance to use when testing values that are close together.
|
||||
pub epsilon: A::Epsilon,
|
||||
}
|
||||
|
||||
impl<A, B> Default for AbsDiff<A, B>
|
||||
where
|
||||
A: AbsDiffEq<B> + ?Sized,
|
||||
B: ?Sized,
|
||||
{
|
||||
#[inline]
|
||||
fn default() -> AbsDiff<A, B> {
|
||||
AbsDiff {
|
||||
epsilon: A::default_epsilon(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, B> AbsDiff<A, B>
|
||||
where
|
||||
A: AbsDiffEq<B> + ?Sized,
|
||||
B: ?Sized,
|
||||
{
|
||||
/// Replace the epsilon value with the one specified.
|
||||
#[inline]
|
||||
pub fn epsilon(self, epsilon: A::Epsilon) -> AbsDiff<A, B> {
|
||||
AbsDiff { epsilon, ..self }
|
||||
}
|
||||
|
||||
/// Peform the equality comparison
|
||||
#[inline]
|
||||
pub fn eq(self, lhs: &A, rhs: &B) -> bool {
|
||||
A::abs_diff_eq(lhs, rhs, self.epsilon)
|
||||
}
|
||||
|
||||
/// Peform the inequality comparison
|
||||
#[inline]
|
||||
pub fn ne(self, lhs: &A, rhs: &B) -> bool {
|
||||
A::abs_diff_ne(lhs, rhs, self.epsilon)
|
||||
}
|
||||
}
|
||||
|
||||
/// The requisite parameters for testing for approximate equality using a
|
||||
/// relative based comparison.
|
||||
///
|
||||
/// This is not normally used directly, rather via the
|
||||
/// `assert_relative_{eq|ne}!` and `relative_{eq|ne}!` macros.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use std::f64;
|
||||
/// use approx::Relative;
|
||||
///
|
||||
/// Relative::default().eq(&1.0, &1.0);
|
||||
/// Relative::default().epsilon(f64::EPSILON).eq(&1.0, &1.0);
|
||||
/// Relative::default().max_relative(1.0).eq(&1.0, &1.0);
|
||||
/// Relative::default().epsilon(f64::EPSILON).max_relative(1.0).eq(&1.0, &1.0);
|
||||
/// Relative::default().max_relative(1.0).epsilon(f64::EPSILON).eq(&1.0, &1.0);
|
||||
/// ```
|
||||
pub struct Relative<A, B = A>
|
||||
where
|
||||
A: RelativeEq<B> + ?Sized,
|
||||
B: ?Sized,
|
||||
{
|
||||
/// The tolerance to use when testing values that are close together.
|
||||
pub epsilon: A::Epsilon,
|
||||
/// The relative tolerance for testing values that are far-apart.
|
||||
pub max_relative: A::Epsilon,
|
||||
}
|
||||
|
||||
impl<A, B> Default for Relative<A, B>
|
||||
where
|
||||
A: RelativeEq<B> + ?Sized,
|
||||
B: ?Sized,
|
||||
{
|
||||
#[inline]
|
||||
fn default() -> Relative<A, B> {
|
||||
Relative {
|
||||
epsilon: A::default_epsilon(),
|
||||
max_relative: A::default_max_relative(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, B> Relative<A, B>
|
||||
where
|
||||
A: RelativeEq<B> + ?Sized,
|
||||
B: ?Sized,
|
||||
{
|
||||
/// Replace the epsilon value with the one specified.
|
||||
#[inline]
|
||||
pub fn epsilon(self, epsilon: A::Epsilon) -> Relative<A, B> {
|
||||
Relative { epsilon, ..self }
|
||||
}
|
||||
|
||||
/// Replace the maximum relative value with the one specified.
|
||||
#[inline]
|
||||
pub fn max_relative(self, max_relative: A::Epsilon) -> Relative<A, B> {
|
||||
Relative {
|
||||
max_relative,
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Peform the equality comparison
|
||||
#[inline]
|
||||
pub fn eq(self, lhs: &A, rhs: &B) -> bool {
|
||||
A::relative_eq(lhs, rhs, self.epsilon, self.max_relative)
|
||||
}
|
||||
|
||||
/// Peform the inequality comparison
|
||||
#[inline]
|
||||
pub fn ne(self, lhs: &A, rhs: &B) -> bool {
|
||||
A::relative_ne(lhs, rhs, self.epsilon, self.max_relative)
|
||||
}
|
||||
}
|
||||
|
||||
/// The requisite parameters for testing for approximate equality using an ULPs
|
||||
/// based comparison.
|
||||
///
|
||||
/// This is not normally used directly, rather via the `assert_ulps_{eq|ne}!`
|
||||
/// and `ulps_{eq|ne}!` macros.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use std::f64;
|
||||
/// use approx::Ulps;
|
||||
///
|
||||
/// Ulps::default().eq(&1.0, &1.0);
|
||||
/// Ulps::default().epsilon(f64::EPSILON).eq(&1.0, &1.0);
|
||||
/// Ulps::default().max_ulps(4).eq(&1.0, &1.0);
|
||||
/// Ulps::default().epsilon(f64::EPSILON).max_ulps(4).eq(&1.0, &1.0);
|
||||
/// Ulps::default().max_ulps(4).epsilon(f64::EPSILON).eq(&1.0, &1.0);
|
||||
/// ```
|
||||
pub struct Ulps<A, B = A>
|
||||
where
|
||||
A: UlpsEq<B> + ?Sized,
|
||||
B: ?Sized,
|
||||
{
|
||||
/// The tolerance to use when testing values that are close together.
|
||||
pub epsilon: A::Epsilon,
|
||||
/// The ULPs to tolerate when testing values that are far-apart.
|
||||
pub max_ulps: u32,
|
||||
}
|
||||
|
||||
impl<A, B> Default for Ulps<A, B>
|
||||
where
|
||||
A: UlpsEq<B> + ?Sized,
|
||||
B: ?Sized,
|
||||
{
|
||||
#[inline]
|
||||
fn default() -> Ulps<A, B> {
|
||||
Ulps {
|
||||
epsilon: A::default_epsilon(),
|
||||
max_ulps: A::default_max_ulps(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, B> Ulps<A, B>
|
||||
where
|
||||
A: UlpsEq<B> + ?Sized,
|
||||
B: ?Sized,
|
||||
{
|
||||
/// Replace the epsilon value with the one specified.
|
||||
#[inline]
|
||||
pub fn epsilon(self, epsilon: A::Epsilon) -> Ulps<A, B> {
|
||||
Ulps { epsilon, ..self }
|
||||
}
|
||||
|
||||
/// Replace the max ulps value with the one specified.
|
||||
#[inline]
|
||||
pub fn max_ulps(self, max_ulps: u32) -> Ulps<A, B> {
|
||||
Ulps { max_ulps, ..self }
|
||||
}
|
||||
|
||||
/// Peform the equality comparison
|
||||
#[inline]
|
||||
pub fn eq(self, lhs: &A, rhs: &B) -> bool {
|
||||
A::ulps_eq(lhs, rhs, self.epsilon, self.max_ulps)
|
||||
}
|
||||
|
||||
/// Peform the inequality comparison
|
||||
#[inline]
|
||||
pub fn ne(self, lhs: &A, rhs: &B) -> bool {
|
||||
A::ulps_ne(lhs, rhs, self.epsilon, self.max_ulps)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,187 @@
|
|||
// Copyright 2015 Brendan Zabarauskas
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/// Approximate equality of using the absolute difference.
|
||||
#[macro_export]
|
||||
macro_rules! abs_diff_eq {
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
|
||||
$crate::AbsDiff::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
|
||||
};
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
$crate::AbsDiff::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
|
||||
};
|
||||
}
|
||||
|
||||
/// Approximate inequality of using the absolute difference.
|
||||
#[macro_export]
|
||||
macro_rules! abs_diff_ne {
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
|
||||
$crate::AbsDiff::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
|
||||
};
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
$crate::AbsDiff::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
|
||||
};
|
||||
}
|
||||
|
||||
/// Approximate equality using both the absolute difference and relative based comparisons.
|
||||
#[macro_export]
|
||||
macro_rules! relative_eq {
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
|
||||
$crate::Relative::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
|
||||
};
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
$crate::Relative::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
|
||||
};
|
||||
}
|
||||
|
||||
/// Approximate inequality using both the absolute difference and relative based comparisons.
|
||||
#[macro_export]
|
||||
macro_rules! relative_ne {
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
|
||||
$crate::Relative::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
|
||||
};
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
$crate::Relative::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
|
||||
};
|
||||
}
|
||||
|
||||
/// Approximate equality using both the absolute difference and ULPs (Units in Last Place).
|
||||
#[macro_export]
|
||||
macro_rules! ulps_eq {
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
|
||||
$crate::Ulps::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
|
||||
};
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
$crate::Ulps::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
|
||||
};
|
||||
}
|
||||
|
||||
/// Approximate inequality using both the absolute difference and ULPs (Units in Last Place).
|
||||
#[macro_export]
|
||||
macro_rules! ulps_ne {
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
|
||||
$crate::Ulps::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
|
||||
};
|
||||
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
$crate::Ulps::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
|
||||
};
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[macro_export]
|
||||
macro_rules! __assert_approx {
|
||||
($eq:ident, $given:expr, $expected:expr) => {{
|
||||
let (given, expected) = (&($given), &($expected));
|
||||
|
||||
if !$eq!(*given, *expected) {
|
||||
panic!(
|
||||
"assert_{}!({}, {})
|
||||
|
||||
left = {:?}
|
||||
right = {:?}
|
||||
|
||||
",
|
||||
stringify!($eq),
|
||||
stringify!($given),
|
||||
stringify!($expected),
|
||||
given, expected,
|
||||
);
|
||||
}
|
||||
}};
|
||||
($eq:ident, $given:expr, $expected:expr, $($opt:ident = $val:expr),+) => {{
|
||||
let (given, expected) = (&($given), &($expected));
|
||||
|
||||
if !$eq!(*given, *expected, $($opt = $val),+) {
|
||||
panic!(
|
||||
"assert_{}!({}, {}, {})
|
||||
|
||||
left = {:?}
|
||||
right = {:?}
|
||||
|
||||
",
|
||||
stringify!($eq),
|
||||
stringify!($given),
|
||||
stringify!($expected),
|
||||
stringify!($($opt = $val),+),
|
||||
given, expected,
|
||||
);
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// An assertion that delegates to `abs_diff_eq!`, and panics with a helpful error on failure.
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! assert_abs_diff_eq {
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
|
||||
__assert_approx!(abs_diff_eq, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
__assert_approx!(abs_diff_eq, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
}
|
||||
|
||||
/// An assertion that delegates to `abs_diff_ne!`, and panics with a helpful error on failure.
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! assert_abs_diff_ne {
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
|
||||
__assert_approx!(abs_diff_ne, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
__assert_approx!(abs_diff_ne, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
}
|
||||
|
||||
/// An assertion that delegates to `relative_eq!`, and panics with a helpful error on failure.
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! assert_relative_eq {
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
|
||||
__assert_approx!(relative_eq, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
__assert_approx!(relative_eq, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
}
|
||||
|
||||
/// An assertion that delegates to `relative_ne!`, and panics with a helpful error on failure.
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! assert_relative_ne {
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
|
||||
__assert_approx!(relative_ne, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
__assert_approx!(relative_ne, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
}
|
||||
|
||||
/// An assertion that delegates to `ulps_eq!`, and panics with a helpful error on failure.
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! assert_ulps_eq {
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
|
||||
__assert_approx!(ulps_eq, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
__assert_approx!(ulps_eq, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
}
|
||||
|
||||
/// An assertion that delegates to `ulps_ne!`, and panics with a helpful error on failure.
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! assert_ulps_ne {
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
|
||||
__assert_approx!(ulps_ne, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
|
||||
__assert_approx!(ulps_ne, $given, $expected $(, $opt = $val)*)
|
||||
};
|
||||
}
|
|
@ -0,0 +1,196 @@
|
|||
#[cfg(feature = "num-complex")]
|
||||
use num_complex::Complex;
|
||||
#[cfg(not(feature = "std"))]
|
||||
use num_traits::float::FloatCore;
|
||||
use std::{cell, f32, f64};
|
||||
|
||||
use AbsDiffEq;
|
||||
|
||||
/// Equality comparisons between two numbers using both the absolute difference and
|
||||
/// relative based comparisons.
|
||||
pub trait RelativeEq<Rhs = Self>: AbsDiffEq<Rhs>
|
||||
where
|
||||
Rhs: ?Sized,
|
||||
{
|
||||
/// The default relative tolerance for testing values that are far-apart.
|
||||
///
|
||||
/// This is used when no `max_relative` value is supplied to the `relative_eq` macro.
|
||||
fn default_max_relative() -> Self::Epsilon;
|
||||
|
||||
/// A test for equality that uses a relative comparison if the values are far apart.
|
||||
fn relative_eq(
|
||||
&self,
|
||||
other: &Rhs,
|
||||
epsilon: Self::Epsilon,
|
||||
max_relative: Self::Epsilon,
|
||||
) -> bool;
|
||||
|
||||
/// The inverse of `ApproxEq::relative_eq`.
|
||||
fn relative_ne(
|
||||
&self,
|
||||
other: &Rhs,
|
||||
epsilon: Self::Epsilon,
|
||||
max_relative: Self::Epsilon,
|
||||
) -> bool {
|
||||
!Self::relative_eq(self, other, epsilon, max_relative)
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Base implementations
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Implementation based on: [Comparing Floating Point Numbers, 2012 Edition]
|
||||
// (https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/)
|
||||
macro_rules! impl_relative_eq {
|
||||
($T:ident, $U:ident) => {
|
||||
impl RelativeEq for $T {
|
||||
#[inline]
|
||||
fn default_max_relative() -> $T {
|
||||
$T::EPSILON
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn relative_eq(&self, other: &$T, epsilon: $T, max_relative: $T) -> bool {
|
||||
// Handle same infinities
|
||||
if self == other {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Handle remaining infinities
|
||||
if $T::is_infinite(*self) || $T::is_infinite(*other) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let abs_diff = $T::abs(self - other);
|
||||
|
||||
// For when the numbers are really close together
|
||||
if abs_diff <= epsilon {
|
||||
return true;
|
||||
}
|
||||
|
||||
let abs_self = $T::abs(*self);
|
||||
let abs_other = $T::abs(*other);
|
||||
|
||||
let largest = if abs_other > abs_self {
|
||||
abs_other
|
||||
} else {
|
||||
abs_self
|
||||
};
|
||||
|
||||
// Use a relative difference comparison
|
||||
abs_diff <= largest * max_relative
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_relative_eq!(f32, i32);
|
||||
impl_relative_eq!(f64, i64);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Derived implementations
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
impl<'a, T: RelativeEq + ?Sized> RelativeEq for &'a T {
|
||||
#[inline]
|
||||
fn default_max_relative() -> T::Epsilon {
|
||||
T::default_max_relative()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn relative_eq(&self, other: &&'a T, epsilon: T::Epsilon, max_relative: T::Epsilon) -> bool {
|
||||
T::relative_eq(*self, *other, epsilon, max_relative)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: RelativeEq + ?Sized> RelativeEq for &'a mut T {
|
||||
#[inline]
|
||||
fn default_max_relative() -> T::Epsilon {
|
||||
T::default_max_relative()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn relative_eq(
|
||||
&self,
|
||||
other: &&'a mut T,
|
||||
epsilon: T::Epsilon,
|
||||
max_relative: T::Epsilon,
|
||||
) -> bool {
|
||||
T::relative_eq(*self, *other, epsilon, max_relative)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RelativeEq + Copy> RelativeEq for cell::Cell<T> {
|
||||
#[inline]
|
||||
fn default_max_relative() -> T::Epsilon {
|
||||
T::default_max_relative()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn relative_eq(
|
||||
&self,
|
||||
other: &cell::Cell<T>,
|
||||
epsilon: T::Epsilon,
|
||||
max_relative: T::Epsilon,
|
||||
) -> bool {
|
||||
T::relative_eq(&self.get(), &other.get(), epsilon, max_relative)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RelativeEq + ?Sized> RelativeEq for cell::RefCell<T> {
|
||||
#[inline]
|
||||
fn default_max_relative() -> T::Epsilon {
|
||||
T::default_max_relative()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn relative_eq(
|
||||
&self,
|
||||
other: &cell::RefCell<T>,
|
||||
epsilon: T::Epsilon,
|
||||
max_relative: T::Epsilon,
|
||||
) -> bool {
|
||||
T::relative_eq(&self.borrow(), &other.borrow(), epsilon, max_relative)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, B> RelativeEq<[B]> for [A]
|
||||
where
|
||||
A: RelativeEq<B>,
|
||||
A::Epsilon: Clone,
|
||||
{
|
||||
#[inline]
|
||||
fn default_max_relative() -> A::Epsilon {
|
||||
A::default_max_relative()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn relative_eq(&self, other: &[B], epsilon: A::Epsilon, max_relative: A::Epsilon) -> bool {
|
||||
self.len() == other.len()
|
||||
&& Iterator::zip(self.iter(), other)
|
||||
.all(|(x, y)| A::relative_eq(x, y, epsilon.clone(), max_relative.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "num-complex")]
|
||||
impl<T: RelativeEq> RelativeEq for Complex<T>
|
||||
where
|
||||
T::Epsilon: Clone,
|
||||
{
|
||||
#[inline]
|
||||
fn default_max_relative() -> T::Epsilon {
|
||||
T::default_max_relative()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn relative_eq(
|
||||
&self,
|
||||
other: &Complex<T>,
|
||||
epsilon: T::Epsilon,
|
||||
max_relative: T::Epsilon,
|
||||
) -> bool {
|
||||
T::relative_eq(&self.re, &other.re, epsilon.clone(), max_relative.clone())
|
||||
&& T::relative_eq(&self.im, &other.im, epsilon.clone(), max_relative.clone())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
#[cfg(feature = "num-complex")]
|
||||
use num_complex::Complex;
|
||||
#[cfg(not(feature = "std"))]
|
||||
use num_traits::float::FloatCore;
|
||||
use std::{cell, mem};
|
||||
|
||||
use AbsDiffEq;
|
||||
|
||||
/// Equality comparisons between two numbers using both the absolute difference and ULPs
|
||||
/// (Units in Last Place) based comparisons.
|
||||
pub trait UlpsEq<Rhs = Self>: AbsDiffEq<Rhs>
|
||||
where
|
||||
Rhs: ?Sized,
|
||||
{
|
||||
/// The default ULPs to tolerate when testing values that are far-apart.
|
||||
///
|
||||
/// This is used when no `max_ulps` value is supplied to the `ulps_eq` macro.
|
||||
fn default_max_ulps() -> u32;
|
||||
|
||||
/// A test for equality that uses units in the last place (ULP) if the values are far apart.
|
||||
fn ulps_eq(&self, other: &Rhs, epsilon: Self::Epsilon, max_ulps: u32) -> bool;
|
||||
|
||||
/// The inverse of `ApproxEq::ulps_eq`.
|
||||
fn ulps_ne(&self, other: &Rhs, epsilon: Self::Epsilon, max_ulps: u32) -> bool {
|
||||
!Self::ulps_eq(self, other, epsilon, max_ulps)
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Base implementations
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Implementation based on: [Comparing Floating Point Numbers, 2012 Edition]
|
||||
// (https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/)
|
||||
macro_rules! impl_ulps_eq {
|
||||
($T:ident, $U:ident) => {
|
||||
impl UlpsEq for $T {
|
||||
#[inline]
|
||||
fn default_max_ulps() -> u32 {
|
||||
4
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ulps_eq(&self, other: &$T, epsilon: $T, max_ulps: u32) -> bool {
|
||||
// For when the numbers are really close together
|
||||
if $T::abs_diff_eq(self, other, epsilon) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Trivial negative sign check
|
||||
if self.signum() != other.signum() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// ULPS difference comparison
|
||||
let int_self: $U = unsafe { mem::transmute(*self) };
|
||||
let int_other: $U = unsafe { mem::transmute(*other) };
|
||||
|
||||
$U::abs(int_self - int_other) <= max_ulps as $U
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_ulps_eq!(f32, i32);
|
||||
impl_ulps_eq!(f64, i64);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Derived implementations
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
impl<'a, T: UlpsEq + ?Sized> UlpsEq for &'a T {
|
||||
#[inline]
|
||||
fn default_max_ulps() -> u32 {
|
||||
T::default_max_ulps()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ulps_eq(&self, other: &&'a T, epsilon: T::Epsilon, max_ulps: u32) -> bool {
|
||||
T::ulps_eq(*self, *other, epsilon, max_ulps)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: UlpsEq + ?Sized> UlpsEq for &'a mut T {
|
||||
#[inline]
|
||||
fn default_max_ulps() -> u32 {
|
||||
T::default_max_ulps()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ulps_eq(&self, other: &&'a mut T, epsilon: T::Epsilon, max_ulps: u32) -> bool {
|
||||
T::ulps_eq(*self, *other, epsilon, max_ulps)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: UlpsEq + Copy> UlpsEq for cell::Cell<T> {
|
||||
#[inline]
|
||||
fn default_max_ulps() -> u32 {
|
||||
T::default_max_ulps()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ulps_eq(&self, other: &cell::Cell<T>, epsilon: T::Epsilon, max_ulps: u32) -> bool {
|
||||
T::ulps_eq(&self.get(), &other.get(), epsilon, max_ulps)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: UlpsEq + ?Sized> UlpsEq for cell::RefCell<T> {
|
||||
#[inline]
|
||||
fn default_max_ulps() -> u32 {
|
||||
T::default_max_ulps()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ulps_eq(&self, other: &cell::RefCell<T>, epsilon: T::Epsilon, max_ulps: u32) -> bool {
|
||||
T::ulps_eq(&self.borrow(), &other.borrow(), epsilon, max_ulps)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, B> UlpsEq<[B]> for [A]
|
||||
where
|
||||
A: UlpsEq<B>,
|
||||
A::Epsilon: Clone,
|
||||
{
|
||||
#[inline]
|
||||
fn default_max_ulps() -> u32 {
|
||||
A::default_max_ulps()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ulps_eq(&self, other: &[B], epsilon: A::Epsilon, max_ulps: u32) -> bool {
|
||||
self.len() == other.len()
|
||||
&& Iterator::zip(self.iter(), other)
|
||||
.all(|(x, y)| A::ulps_eq(x, y, epsilon.clone(), max_ulps.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "num-complex")]
|
||||
impl<T: UlpsEq> UlpsEq for Complex<T>
|
||||
where
|
||||
T::Epsilon: Clone,
|
||||
{
|
||||
#[inline]
|
||||
fn default_max_ulps() -> u32 {
|
||||
T::default_max_ulps()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ulps_eq(&self, other: &Complex<T>, epsilon: T::Epsilon, max_ulps: u32) -> bool {
|
||||
T::ulps_eq(&self.re, &other.re, epsilon.clone(), max_ulps)
|
||||
&& T::ulps_eq(&self.im, &other.im, epsilon.clone(), max_ulps)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,442 @@
|
|||
// Copyright 2015 Brendan Zabarauskas
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Test cases derived from:
|
||||
// https://github.com/Pybonacci/puntoflotante.org/blob/master/content/errors/NearlyEqualsTest.java
|
||||
|
||||
#[macro_use]
|
||||
extern crate approx;
|
||||
|
||||
mod test_f32 {
|
||||
use std::f32;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_abs_diff_eq!(1.0f32, 1.0f32);
|
||||
assert_abs_diff_ne!(1.0f32, 2.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_abs_diff_eq!(1.0f32, 2.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_abs_diff_ne!(1.0f32, 1.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big() {
|
||||
assert_abs_diff_eq!(100000000.0f32, 100000001.0f32);
|
||||
assert_abs_diff_eq!(100000001.0f32, 100000000.0f32);
|
||||
assert_abs_diff_ne!(10000.0f32, 10001.0f32);
|
||||
assert_abs_diff_ne!(10001.0f32, 10000.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big_neg() {
|
||||
assert_abs_diff_eq!(-100000000.0f32, -100000001.0f32);
|
||||
assert_abs_diff_eq!(-100000001.0f32, -100000000.0f32);
|
||||
assert_abs_diff_ne!(-10000.0f32, -10001.0f32);
|
||||
assert_abs_diff_ne!(-10001.0f32, -10000.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid() {
|
||||
assert_abs_diff_eq!(1.0000001f32, 1.0000002f32);
|
||||
assert_abs_diff_eq!(1.0000002f32, 1.0000001f32);
|
||||
assert_abs_diff_ne!(1.000001f32, 1.000002f32);
|
||||
assert_abs_diff_ne!(1.000002f32, 1.000001f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid_neg() {
|
||||
assert_abs_diff_eq!(-1.0000001f32, -1.0000002f32);
|
||||
assert_abs_diff_eq!(-1.0000002f32, -1.0000001f32);
|
||||
assert_abs_diff_ne!(-1.000001f32, -1.000002f32);
|
||||
assert_abs_diff_ne!(-1.000002f32, -1.000001f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small() {
|
||||
assert_abs_diff_eq!(0.000010001f32, 0.000010002f32);
|
||||
assert_abs_diff_eq!(0.000010002f32, 0.000010001f32);
|
||||
assert_abs_diff_ne!(0.000001002f32, 0.0000001001f32);
|
||||
assert_abs_diff_ne!(0.000001001f32, 0.0000001002f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small_neg() {
|
||||
assert_abs_diff_eq!(-0.000010001f32, -0.000010002f32);
|
||||
assert_abs_diff_eq!(-0.000010002f32, -0.000010001f32);
|
||||
assert_abs_diff_ne!(-0.000001002f32, -0.0000001001f32);
|
||||
assert_abs_diff_ne!(-0.000001001f32, -0.0000001002f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero() {
|
||||
assert_abs_diff_eq!(0.0f32, 0.0f32);
|
||||
assert_abs_diff_eq!(0.0f32, -0.0f32);
|
||||
assert_abs_diff_eq!(-0.0f32, -0.0f32);
|
||||
|
||||
assert_abs_diff_ne!(0.000001f32, 0.0f32);
|
||||
assert_abs_diff_ne!(0.0f32, 0.000001f32);
|
||||
assert_abs_diff_ne!(-0.000001f32, 0.0f32);
|
||||
assert_abs_diff_ne!(0.0f32, -0.000001f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_epsilon() {
|
||||
assert_abs_diff_eq!(0.0f32, 1e-40f32, epsilon = 1e-40f32);
|
||||
assert_abs_diff_eq!(1e-40f32, 0.0f32, epsilon = 1e-40f32);
|
||||
assert_abs_diff_eq!(0.0f32, -1e-40f32, epsilon = 1e-40f32);
|
||||
assert_abs_diff_eq!(-1e-40f32, 0.0f32, epsilon = 1e-40f32);
|
||||
|
||||
assert_abs_diff_ne!(1e-40f32, 0.0f32, epsilon = 1e-41f32);
|
||||
assert_abs_diff_ne!(0.0f32, 1e-40f32, epsilon = 1e-41f32);
|
||||
assert_abs_diff_ne!(-1e-40f32, 0.0f32, epsilon = 1e-41f32);
|
||||
assert_abs_diff_ne!(0.0f32, -1e-40f32, epsilon = 1e-41f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max() {
|
||||
assert_abs_diff_eq!(f32::MAX, f32::MAX);
|
||||
assert_abs_diff_ne!(f32::MAX, -f32::MAX);
|
||||
assert_abs_diff_ne!(-f32::MAX, f32::MAX);
|
||||
assert_abs_diff_ne!(f32::MAX, f32::MAX / 2.0);
|
||||
assert_abs_diff_ne!(f32::MAX, -f32::MAX / 2.0);
|
||||
assert_abs_diff_ne!(-f32::MAX, f32::MAX / 2.0);
|
||||
}
|
||||
|
||||
// NOTE: abs_diff_eq fails as numbers begin to get very large
|
||||
|
||||
// #[test]
|
||||
// fn test_infinity() {
|
||||
// assert_abs_diff_eq!(f32::INFINITY, f32::INFINITY);
|
||||
// assert_abs_diff_eq!(f32::NEG_INFINITY, f32::NEG_INFINITY);
|
||||
// assert_abs_diff_ne!(f32::NEG_INFINITY, f32::INFINITY);
|
||||
// assert_abs_diff_eq!(f32::INFINITY, f32::MAX);
|
||||
// assert_abs_diff_eq!(f32::NEG_INFINITY, -f32::MAX);
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_nan() {
|
||||
assert_abs_diff_ne!(f32::NAN, f32::NAN);
|
||||
|
||||
assert_abs_diff_ne!(f32::NAN, 0.0);
|
||||
assert_abs_diff_ne!(-0.0, f32::NAN);
|
||||
assert_abs_diff_ne!(f32::NAN, -0.0);
|
||||
assert_abs_diff_ne!(0.0, f32::NAN);
|
||||
|
||||
assert_abs_diff_ne!(f32::NAN, f32::INFINITY);
|
||||
assert_abs_diff_ne!(f32::INFINITY, f32::NAN);
|
||||
assert_abs_diff_ne!(f32::NAN, f32::NEG_INFINITY);
|
||||
assert_abs_diff_ne!(f32::NEG_INFINITY, f32::NAN);
|
||||
|
||||
assert_abs_diff_ne!(f32::NAN, f32::MAX);
|
||||
assert_abs_diff_ne!(f32::MAX, f32::NAN);
|
||||
assert_abs_diff_ne!(f32::NAN, -f32::MAX);
|
||||
assert_abs_diff_ne!(-f32::MAX, f32::NAN);
|
||||
|
||||
assert_abs_diff_ne!(f32::NAN, f32::MIN_POSITIVE);
|
||||
assert_abs_diff_ne!(f32::MIN_POSITIVE, f32::NAN);
|
||||
assert_abs_diff_ne!(f32::NAN, -f32::MIN_POSITIVE);
|
||||
assert_abs_diff_ne!(-f32::MIN_POSITIVE, f32::NAN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_opposite_signs() {
|
||||
assert_abs_diff_ne!(1.000000001f32, -1.0f32);
|
||||
assert_abs_diff_ne!(-1.0f32, 1.000000001f32);
|
||||
assert_abs_diff_ne!(-1.000000001f32, 1.0f32);
|
||||
assert_abs_diff_ne!(1.0f32, -1.000000001f32);
|
||||
|
||||
assert_abs_diff_eq!(10.0 * f32::MIN_POSITIVE, 10.0 * -f32::MIN_POSITIVE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_close_to_zero() {
|
||||
assert_abs_diff_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE);
|
||||
assert_abs_diff_eq!(f32::MIN_POSITIVE, -f32::MIN_POSITIVE);
|
||||
assert_abs_diff_eq!(-f32::MIN_POSITIVE, f32::MIN_POSITIVE);
|
||||
|
||||
assert_abs_diff_eq!(f32::MIN_POSITIVE, 0.0f32);
|
||||
assert_abs_diff_eq!(0.0f32, f32::MIN_POSITIVE);
|
||||
assert_abs_diff_eq!(-f32::MIN_POSITIVE, 0.0f32);
|
||||
assert_abs_diff_eq!(0.0f32, -f32::MIN_POSITIVE);
|
||||
|
||||
assert_abs_diff_ne!(0.000001f32, -f32::MIN_POSITIVE);
|
||||
assert_abs_diff_ne!(0.000001f32, f32::MIN_POSITIVE);
|
||||
assert_abs_diff_ne!(f32::MIN_POSITIVE, 0.000001f32);
|
||||
assert_abs_diff_ne!(-f32::MIN_POSITIVE, 0.000001f32);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_f64 {
|
||||
use std::f64;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_abs_diff_eq!(1.0f64, 1.0f64);
|
||||
assert_abs_diff_ne!(1.0f64, 2.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_abs_diff_eq!(1.0f64, 2.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_abs_diff_ne!(1.0f64, 1.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big() {
|
||||
assert_abs_diff_eq!(10000000000000000.0f64, 10000000000000001.0f64);
|
||||
assert_abs_diff_eq!(10000000000000001.0f64, 10000000000000000.0f64);
|
||||
assert_abs_diff_ne!(1000000000000000.0f64, 1000000000000001.0f64);
|
||||
assert_abs_diff_ne!(1000000000000001.0f64, 1000000000000000.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big_neg() {
|
||||
assert_abs_diff_eq!(-10000000000000000.0f64, -10000000000000001.0f64);
|
||||
assert_abs_diff_eq!(-10000000000000001.0f64, -10000000000000000.0f64);
|
||||
assert_abs_diff_ne!(-1000000000000000.0f64, -1000000000000001.0f64);
|
||||
assert_abs_diff_ne!(-1000000000000001.0f64, -1000000000000000.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid() {
|
||||
assert_abs_diff_eq!(1.0000000000000001f64, 1.0000000000000002f64);
|
||||
assert_abs_diff_eq!(1.0000000000000002f64, 1.0000000000000001f64);
|
||||
assert_abs_diff_ne!(1.000000000000001f64, 1.000000000000002f64);
|
||||
assert_abs_diff_ne!(1.000000000000002f64, 1.000000000000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid_neg() {
|
||||
assert_abs_diff_eq!(-1.0000000000000001f64, -1.0000000000000002f64);
|
||||
assert_abs_diff_eq!(-1.0000000000000002f64, -1.0000000000000001f64);
|
||||
assert_abs_diff_ne!(-1.000000000000001f64, -1.000000000000002f64);
|
||||
assert_abs_diff_ne!(-1.000000000000002f64, -1.000000000000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small() {
|
||||
assert_abs_diff_eq!(0.0000000100000001f64, 0.0000000100000002f64);
|
||||
assert_abs_diff_eq!(0.0000000100000002f64, 0.0000000100000001f64);
|
||||
assert_abs_diff_ne!(0.0000000100000001f64, 0.0000000010000002f64);
|
||||
assert_abs_diff_ne!(0.0000000100000002f64, 0.0000000010000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small_neg() {
|
||||
assert_abs_diff_eq!(-0.0000000100000001f64, -0.0000000100000002f64);
|
||||
assert_abs_diff_eq!(-0.0000000100000002f64, -0.0000000100000001f64);
|
||||
assert_abs_diff_ne!(-0.0000000100000001f64, -0.0000000010000002f64);
|
||||
assert_abs_diff_ne!(-0.0000000100000002f64, -0.0000000010000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero() {
|
||||
assert_abs_diff_eq!(0.0f64, 0.0f64);
|
||||
assert_abs_diff_eq!(0.0f64, -0.0f64);
|
||||
assert_abs_diff_eq!(-0.0f64, -0.0f64);
|
||||
|
||||
assert_abs_diff_ne!(0.000000000000001f64, 0.0f64);
|
||||
assert_abs_diff_ne!(0.0f64, 0.000000000000001f64);
|
||||
assert_abs_diff_ne!(-0.000000000000001f64, 0.0f64);
|
||||
assert_abs_diff_ne!(0.0f64, -0.000000000000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_epsilon() {
|
||||
assert_abs_diff_eq!(0.0f64, 1e-40f64, epsilon = 1e-40f64);
|
||||
assert_abs_diff_eq!(1e-40f64, 0.0f64, epsilon = 1e-40f64);
|
||||
assert_abs_diff_eq!(0.0f64, -1e-40f64, epsilon = 1e-40f64);
|
||||
assert_abs_diff_eq!(-1e-40f64, 0.0f64, epsilon = 1e-40f64);
|
||||
|
||||
assert_abs_diff_ne!(1e-40f64, 0.0f64, epsilon = 1e-41f64);
|
||||
assert_abs_diff_ne!(0.0f64, 1e-40f64, epsilon = 1e-41f64);
|
||||
assert_abs_diff_ne!(-1e-40f64, 0.0f64, epsilon = 1e-41f64);
|
||||
assert_abs_diff_ne!(0.0f64, -1e-40f64, epsilon = 1e-41f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max() {
|
||||
assert_abs_diff_eq!(f64::MAX, f64::MAX);
|
||||
assert_abs_diff_ne!(f64::MAX, -f64::MAX);
|
||||
assert_abs_diff_ne!(-f64::MAX, f64::MAX);
|
||||
assert_abs_diff_ne!(f64::MAX, f64::MAX / 2.0);
|
||||
assert_abs_diff_ne!(f64::MAX, -f64::MAX / 2.0);
|
||||
assert_abs_diff_ne!(-f64::MAX, f64::MAX / 2.0);
|
||||
}
|
||||
|
||||
// NOTE: abs_diff_eq fails as numbers begin to get very large
|
||||
|
||||
// #[test]
|
||||
// fn test_infinity() {
|
||||
// assert_abs_diff_eq!(f64::INFINITY, f64::INFINITY);
|
||||
// assert_abs_diff_eq!(f64::NEG_INFINITY, f64::NEG_INFINITY);
|
||||
// assert_abs_diff_ne!(f64::NEG_INFINITY, f64::INFINITY);
|
||||
// assert_abs_diff_eq!(f64::INFINITY, f64::MAX);
|
||||
// assert_abs_diff_eq!(f64::NEG_INFINITY, -f64::MAX);
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_nan() {
|
||||
assert_abs_diff_ne!(f64::NAN, f64::NAN);
|
||||
|
||||
assert_abs_diff_ne!(f64::NAN, 0.0);
|
||||
assert_abs_diff_ne!(-0.0, f64::NAN);
|
||||
assert_abs_diff_ne!(f64::NAN, -0.0);
|
||||
assert_abs_diff_ne!(0.0, f64::NAN);
|
||||
|
||||
assert_abs_diff_ne!(f64::NAN, f64::INFINITY);
|
||||
assert_abs_diff_ne!(f64::INFINITY, f64::NAN);
|
||||
assert_abs_diff_ne!(f64::NAN, f64::NEG_INFINITY);
|
||||
assert_abs_diff_ne!(f64::NEG_INFINITY, f64::NAN);
|
||||
|
||||
assert_abs_diff_ne!(f64::NAN, f64::MAX);
|
||||
assert_abs_diff_ne!(f64::MAX, f64::NAN);
|
||||
assert_abs_diff_ne!(f64::NAN, -f64::MAX);
|
||||
assert_abs_diff_ne!(-f64::MAX, f64::NAN);
|
||||
|
||||
assert_abs_diff_ne!(f64::NAN, f64::MIN_POSITIVE);
|
||||
assert_abs_diff_ne!(f64::MIN_POSITIVE, f64::NAN);
|
||||
assert_abs_diff_ne!(f64::NAN, -f64::MIN_POSITIVE);
|
||||
assert_abs_diff_ne!(-f64::MIN_POSITIVE, f64::NAN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_opposite_signs() {
|
||||
assert_abs_diff_ne!(1.000000001f64, -1.0f64);
|
||||
assert_abs_diff_ne!(-1.0f64, 1.000000001f64);
|
||||
assert_abs_diff_ne!(-1.000000001f64, 1.0f64);
|
||||
assert_abs_diff_ne!(1.0f64, -1.000000001f64);
|
||||
|
||||
assert_abs_diff_eq!(10.0 * f64::MIN_POSITIVE, 10.0 * -f64::MIN_POSITIVE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_close_to_zero() {
|
||||
assert_abs_diff_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE);
|
||||
assert_abs_diff_eq!(f64::MIN_POSITIVE, -f64::MIN_POSITIVE);
|
||||
assert_abs_diff_eq!(-f64::MIN_POSITIVE, f64::MIN_POSITIVE);
|
||||
|
||||
assert_abs_diff_eq!(f64::MIN_POSITIVE, 0.0f64);
|
||||
assert_abs_diff_eq!(0.0f64, f64::MIN_POSITIVE);
|
||||
assert_abs_diff_eq!(-f64::MIN_POSITIVE, 0.0f64);
|
||||
assert_abs_diff_eq!(0.0f64, -f64::MIN_POSITIVE);
|
||||
|
||||
assert_abs_diff_ne!(0.000000000000001f64, -f64::MIN_POSITIVE);
|
||||
assert_abs_diff_ne!(0.000000000000001f64, f64::MIN_POSITIVE);
|
||||
assert_abs_diff_ne!(f64::MIN_POSITIVE, 0.000000000000001f64);
|
||||
assert_abs_diff_ne!(-f64::MIN_POSITIVE, 0.000000000000001f64);
|
||||
}
|
||||
}
|
||||
|
||||
mod test_ref {
|
||||
mod test_f32 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_abs_diff_eq!(&1.0f32, &1.0f32);
|
||||
assert_abs_diff_ne!(&1.0f32, &2.0f32);
|
||||
}
|
||||
}
|
||||
|
||||
mod test_f64 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_abs_diff_eq!(&1.0f64, &1.0f64);
|
||||
assert_abs_diff_ne!(&1.0f64, &2.0f64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod test_slice {
|
||||
mod test_f32 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_abs_diff_eq!([1.0f32, 2.0f32][..], [1.0f32, 2.0f32][..]);
|
||||
assert_abs_diff_ne!([1.0f32, 2.0f32][..], [2.0f32, 1.0f32][..]);
|
||||
}
|
||||
}
|
||||
|
||||
mod test_f64 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_abs_diff_eq!([1.0f64, 2.0f64][..], [1.0f64, 2.0f64][..]);
|
||||
assert_abs_diff_ne!([1.0f64, 2.0f64][..], [2.0f64, 1.0f64][..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "num-complex")]
|
||||
mod test_complex {
|
||||
extern crate num_complex;
|
||||
pub use self::num_complex::Complex;
|
||||
|
||||
mod test_f32 {
|
||||
use super::Complex;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_abs_diff_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
|
||||
assert_abs_diff_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_abs_diff_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_abs_diff_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
|
||||
}
|
||||
}
|
||||
|
||||
mod test_f64 {
|
||||
use super::Complex;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_abs_diff_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
|
||||
assert_abs_diff_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_abs_diff_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_abs_diff_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
extern crate approx;
|
||||
|
||||
mod test_macro_import {
|
||||
use approx::{
|
||||
assert_abs_diff_eq, assert_abs_diff_ne, assert_relative_eq, assert_relative_ne,
|
||||
assert_ulps_eq, assert_ulps_ne,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test() {
|
||||
assert_abs_diff_eq!(1.0f32, 1.0f32);
|
||||
assert_abs_diff_ne!(1.0f32, 2.0f32);
|
||||
assert_relative_eq!(1.0f32, 1.0f32);
|
||||
assert_relative_ne!(1.0f32, 2.0f32);
|
||||
assert_ulps_eq!(1.0f32, 1.0f32);
|
||||
assert_ulps_ne!(1.0f32, 2.0f32);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
// Copyright 2015 Brendan Zabarauskas
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Macro instantiation tests
|
||||
|
||||
#[macro_use]
|
||||
extern crate approx;
|
||||
|
||||
#[test]
|
||||
fn test_abs_diff_eq() {
|
||||
let _: bool = abs_diff_eq!(1.0, 1.0);
|
||||
let _: bool = abs_diff_eq!(1.0, 1.0, epsilon = 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_abs_diff_eq_trailing_commas() {
|
||||
let _: bool = abs_diff_eq!(1.0, 1.0,);
|
||||
let _: bool = abs_diff_eq!(1.0, 1.0, epsilon = 1.0,);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_abs_diff_ne() {
|
||||
let _: bool = abs_diff_ne!(1.0, 1.0);
|
||||
let _: bool = abs_diff_ne!(1.0, 1.0, epsilon = 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_abs_diff_ne_trailing_commas() {
|
||||
let _: bool = abs_diff_ne!(1.0, 1.0,);
|
||||
let _: bool = abs_diff_ne!(1.0, 1.0, epsilon = 1.0,);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_relative_eq() {
|
||||
let _: bool = relative_eq!(1.0, 1.0);
|
||||
let _: bool = relative_eq!(1.0, 1.0, epsilon = 1.0);
|
||||
let _: bool = relative_eq!(1.0, 1.0, max_relative = 1.0);
|
||||
let _: bool = relative_eq!(1.0, 1.0, epsilon = 1.0, max_relative = 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_relative_eq_trailing_commas() {
|
||||
let _: bool = relative_eq!(1.0, 1.0,);
|
||||
let _: bool = relative_eq!(1.0, 1.0, epsilon = 1.0, max_relative = 1.0,);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_relative_ne() {
|
||||
let _: bool = relative_ne!(1.0, 1.0);
|
||||
let _: bool = relative_ne!(1.0, 1.0, epsilon = 1.0);
|
||||
let _: bool = relative_ne!(1.0, 1.0, max_relative = 1.0);
|
||||
let _: bool = relative_ne!(1.0, 1.0, epsilon = 1.0, max_relative = 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_relative_ne_trailing_commas() {
|
||||
let _: bool = relative_ne!(1.0, 1.0,);
|
||||
let _: bool = relative_ne!(1.0, 1.0, epsilon = 1.0, max_relative = 1.0,);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ulps_eq() {
|
||||
let _: bool = ulps_eq!(1.0, 1.0);
|
||||
let _: bool = ulps_eq!(1.0, 1.0, epsilon = 1.0);
|
||||
let _: bool = ulps_eq!(1.0, 1.0, max_ulps = 1);
|
||||
let _: bool = ulps_eq!(1.0, 1.0, epsilon = 1.0, max_ulps = 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ulps_eq_trailing_commas() {
|
||||
let _: bool = ulps_eq!(1.0, 1.0,);
|
||||
let _: bool = ulps_eq!(1.0, 1.0, epsilon = 1.0, max_ulps = 1,);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ulps_ne() {
|
||||
let _: bool = ulps_ne!(1.0, 1.0);
|
||||
let _: bool = ulps_ne!(1.0, 1.0, epsilon = 1.0);
|
||||
let _: bool = ulps_ne!(1.0, 1.0, max_ulps = 1);
|
||||
let _: bool = ulps_ne!(1.0, 1.0, epsilon = 1.0, max_ulps = 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ulps_ne_trailing_commas() {
|
||||
let _: bool = ulps_ne!(1.0, 1.0,);
|
||||
let _: bool = ulps_ne!(1.0, 1.0, epsilon = 1.0, max_ulps = 1,);
|
||||
}
|
|
@ -0,0 +1,440 @@
|
|||
// Copyright 2015 Brendan Zabarauskas
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Test cases derived from:
|
||||
// https://github.com/Pybonacci/puntoflotante.org/blob/master/content/errors/NearlyEqualsTest.java
|
||||
|
||||
#[macro_use]
|
||||
extern crate approx;
|
||||
|
||||
mod test_f32 {
|
||||
use std::f32;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_relative_eq!(1.0f32, 1.0f32);
|
||||
assert_relative_ne!(1.0f32, 2.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_relative_eq!(1.0f32, 2.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_relative_ne!(1.0f32, 1.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big() {
|
||||
assert_relative_eq!(100000000.0f32, 100000001.0f32);
|
||||
assert_relative_eq!(100000001.0f32, 100000000.0f32);
|
||||
assert_relative_ne!(10000.0f32, 10001.0f32);
|
||||
assert_relative_ne!(10001.0f32, 10000.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big_neg() {
|
||||
assert_relative_eq!(-100000000.0f32, -100000001.0f32);
|
||||
assert_relative_eq!(-100000001.0f32, -100000000.0f32);
|
||||
assert_relative_ne!(-10000.0f32, -10001.0f32);
|
||||
assert_relative_ne!(-10001.0f32, -10000.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid() {
|
||||
assert_relative_eq!(1.0000001f32, 1.0000002f32);
|
||||
assert_relative_eq!(1.0000002f32, 1.0000001f32);
|
||||
assert_relative_ne!(1.000001f32, 1.000002f32);
|
||||
assert_relative_ne!(1.000002f32, 1.000001f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid_neg() {
|
||||
assert_relative_eq!(-1.0000001f32, -1.0000002f32);
|
||||
assert_relative_eq!(-1.0000002f32, -1.0000001f32);
|
||||
assert_relative_ne!(-1.000001f32, -1.000002f32);
|
||||
assert_relative_ne!(-1.000002f32, -1.000001f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small() {
|
||||
assert_relative_eq!(0.000010001f32, 0.000010002f32);
|
||||
assert_relative_eq!(0.000010002f32, 0.000010001f32);
|
||||
assert_relative_ne!(0.000001002f32, 0.0000001001f32);
|
||||
assert_relative_ne!(0.000001001f32, 0.0000001002f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small_neg() {
|
||||
assert_relative_eq!(-0.000010001f32, -0.000010002f32);
|
||||
assert_relative_eq!(-0.000010002f32, -0.000010001f32);
|
||||
assert_relative_ne!(-0.000001002f32, -0.0000001001f32);
|
||||
assert_relative_ne!(-0.000001001f32, -0.0000001002f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero() {
|
||||
assert_relative_eq!(0.0f32, 0.0f32);
|
||||
assert_relative_eq!(0.0f32, -0.0f32);
|
||||
assert_relative_eq!(-0.0f32, -0.0f32);
|
||||
|
||||
assert_relative_ne!(0.000001f32, 0.0f32);
|
||||
assert_relative_ne!(0.0f32, 0.000001f32);
|
||||
assert_relative_ne!(-0.000001f32, 0.0f32);
|
||||
assert_relative_ne!(0.0f32, -0.000001f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_epsilon() {
|
||||
assert_relative_eq!(0.0f32, 1e-40f32, epsilon = 1e-40f32);
|
||||
assert_relative_eq!(1e-40f32, 0.0f32, epsilon = 1e-40f32);
|
||||
assert_relative_eq!(0.0f32, -1e-40f32, epsilon = 1e-40f32);
|
||||
assert_relative_eq!(-1e-40f32, 0.0f32, epsilon = 1e-40f32);
|
||||
|
||||
assert_relative_ne!(1e-40f32, 0.0f32, epsilon = 1e-41f32);
|
||||
assert_relative_ne!(0.0f32, 1e-40f32, epsilon = 1e-41f32);
|
||||
assert_relative_ne!(-1e-40f32, 0.0f32, epsilon = 1e-41f32);
|
||||
assert_relative_ne!(0.0f32, -1e-40f32, epsilon = 1e-41f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max() {
|
||||
assert_relative_eq!(f32::MAX, f32::MAX);
|
||||
assert_relative_ne!(f32::MAX, -f32::MAX);
|
||||
assert_relative_ne!(-f32::MAX, f32::MAX);
|
||||
assert_relative_ne!(f32::MAX, f32::MAX / 2.0);
|
||||
assert_relative_ne!(f32::MAX, -f32::MAX / 2.0);
|
||||
assert_relative_ne!(-f32::MAX, f32::MAX / 2.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_infinity() {
|
||||
assert_relative_eq!(f32::INFINITY, f32::INFINITY);
|
||||
assert_relative_eq!(f32::NEG_INFINITY, f32::NEG_INFINITY);
|
||||
assert_relative_ne!(f32::NEG_INFINITY, f32::INFINITY);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero_infinity() {
|
||||
assert_relative_ne!(0f32, f32::INFINITY);
|
||||
assert_relative_ne!(0f32, f32::NEG_INFINITY);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nan() {
|
||||
assert_relative_ne!(f32::NAN, f32::NAN);
|
||||
|
||||
assert_relative_ne!(f32::NAN, 0.0);
|
||||
assert_relative_ne!(-0.0, f32::NAN);
|
||||
assert_relative_ne!(f32::NAN, -0.0);
|
||||
assert_relative_ne!(0.0, f32::NAN);
|
||||
|
||||
assert_relative_ne!(f32::NAN, f32::INFINITY);
|
||||
assert_relative_ne!(f32::INFINITY, f32::NAN);
|
||||
assert_relative_ne!(f32::NAN, f32::NEG_INFINITY);
|
||||
assert_relative_ne!(f32::NEG_INFINITY, f32::NAN);
|
||||
|
||||
assert_relative_ne!(f32::NAN, f32::MAX);
|
||||
assert_relative_ne!(f32::MAX, f32::NAN);
|
||||
assert_relative_ne!(f32::NAN, -f32::MAX);
|
||||
assert_relative_ne!(-f32::MAX, f32::NAN);
|
||||
|
||||
assert_relative_ne!(f32::NAN, f32::MIN_POSITIVE);
|
||||
assert_relative_ne!(f32::MIN_POSITIVE, f32::NAN);
|
||||
assert_relative_ne!(f32::NAN, -f32::MIN_POSITIVE);
|
||||
assert_relative_ne!(-f32::MIN_POSITIVE, f32::NAN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_opposite_signs() {
|
||||
assert_relative_ne!(1.000000001f32, -1.0f32);
|
||||
assert_relative_ne!(-1.0f32, 1.000000001f32);
|
||||
assert_relative_ne!(-1.000000001f32, 1.0f32);
|
||||
assert_relative_ne!(1.0f32, -1.000000001f32);
|
||||
|
||||
assert_relative_eq!(10.0 * f32::MIN_POSITIVE, 10.0 * -f32::MIN_POSITIVE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_close_to_zero() {
|
||||
assert_relative_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE);
|
||||
assert_relative_eq!(f32::MIN_POSITIVE, -f32::MIN_POSITIVE);
|
||||
assert_relative_eq!(-f32::MIN_POSITIVE, f32::MIN_POSITIVE);
|
||||
|
||||
assert_relative_eq!(f32::MIN_POSITIVE, 0.0f32);
|
||||
assert_relative_eq!(0.0f32, f32::MIN_POSITIVE);
|
||||
assert_relative_eq!(-f32::MIN_POSITIVE, 0.0f32);
|
||||
assert_relative_eq!(0.0f32, -f32::MIN_POSITIVE);
|
||||
|
||||
assert_relative_ne!(0.000001f32, -f32::MIN_POSITIVE);
|
||||
assert_relative_ne!(0.000001f32, f32::MIN_POSITIVE);
|
||||
assert_relative_ne!(f32::MIN_POSITIVE, 0.000001f32);
|
||||
assert_relative_ne!(-f32::MIN_POSITIVE, 0.000001f32);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_f64 {
|
||||
use std::f64;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_relative_eq!(1.0f64, 1.0f64);
|
||||
assert_relative_ne!(1.0f64, 2.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_relative_eq!(1.0f64, 2.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_relative_ne!(1.0f64, 1.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big() {
|
||||
assert_relative_eq!(10000000000000000.0f64, 10000000000000001.0f64);
|
||||
assert_relative_eq!(10000000000000001.0f64, 10000000000000000.0f64);
|
||||
assert_relative_ne!(1000000000000000.0f64, 1000000000000001.0f64);
|
||||
assert_relative_ne!(1000000000000001.0f64, 1000000000000000.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big_neg() {
|
||||
assert_relative_eq!(-10000000000000000.0f64, -10000000000000001.0f64);
|
||||
assert_relative_eq!(-10000000000000001.0f64, -10000000000000000.0f64);
|
||||
assert_relative_ne!(-1000000000000000.0f64, -1000000000000001.0f64);
|
||||
assert_relative_ne!(-1000000000000001.0f64, -1000000000000000.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid() {
|
||||
assert_relative_eq!(1.0000000000000001f64, 1.0000000000000002f64);
|
||||
assert_relative_eq!(1.0000000000000002f64, 1.0000000000000001f64);
|
||||
assert_relative_ne!(1.000000000000001f64, 1.000000000000002f64);
|
||||
assert_relative_ne!(1.000000000000002f64, 1.000000000000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid_neg() {
|
||||
assert_relative_eq!(-1.0000000000000001f64, -1.0000000000000002f64);
|
||||
assert_relative_eq!(-1.0000000000000002f64, -1.0000000000000001f64);
|
||||
assert_relative_ne!(-1.000000000000001f64, -1.000000000000002f64);
|
||||
assert_relative_ne!(-1.000000000000002f64, -1.000000000000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small() {
|
||||
assert_relative_eq!(0.0000000100000001f64, 0.0000000100000002f64);
|
||||
assert_relative_eq!(0.0000000100000002f64, 0.0000000100000001f64);
|
||||
assert_relative_ne!(0.0000000100000001f64, 0.0000000010000002f64);
|
||||
assert_relative_ne!(0.0000000100000002f64, 0.0000000010000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small_neg() {
|
||||
assert_relative_eq!(-0.0000000100000001f64, -0.0000000100000002f64);
|
||||
assert_relative_eq!(-0.0000000100000002f64, -0.0000000100000001f64);
|
||||
assert_relative_ne!(-0.0000000100000001f64, -0.0000000010000002f64);
|
||||
assert_relative_ne!(-0.0000000100000002f64, -0.0000000010000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero() {
|
||||
assert_relative_eq!(0.0f64, 0.0f64);
|
||||
assert_relative_eq!(0.0f64, -0.0f64);
|
||||
assert_relative_eq!(-0.0f64, -0.0f64);
|
||||
|
||||
assert_relative_ne!(0.000000000000001f64, 0.0f64);
|
||||
assert_relative_ne!(0.0f64, 0.000000000000001f64);
|
||||
assert_relative_ne!(-0.000000000000001f64, 0.0f64);
|
||||
assert_relative_ne!(0.0f64, -0.000000000000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_epsilon() {
|
||||
assert_relative_eq!(0.0f64, 1e-40f64, epsilon = 1e-40f64);
|
||||
assert_relative_eq!(1e-40f64, 0.0f64, epsilon = 1e-40f64);
|
||||
assert_relative_eq!(0.0f64, -1e-40f64, epsilon = 1e-40f64);
|
||||
assert_relative_eq!(-1e-40f64, 0.0f64, epsilon = 1e-40f64);
|
||||
|
||||
assert_relative_ne!(1e-40f64, 0.0f64, epsilon = 1e-41f64);
|
||||
assert_relative_ne!(0.0f64, 1e-40f64, epsilon = 1e-41f64);
|
||||
assert_relative_ne!(-1e-40f64, 0.0f64, epsilon = 1e-41f64);
|
||||
assert_relative_ne!(0.0f64, -1e-40f64, epsilon = 1e-41f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max() {
|
||||
assert_relative_eq!(f64::MAX, f64::MAX);
|
||||
assert_relative_ne!(f64::MAX, -f64::MAX);
|
||||
assert_relative_ne!(-f64::MAX, f64::MAX);
|
||||
assert_relative_ne!(f64::MAX, f64::MAX / 2.0);
|
||||
assert_relative_ne!(f64::MAX, -f64::MAX / 2.0);
|
||||
assert_relative_ne!(-f64::MAX, f64::MAX / 2.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_infinity() {
|
||||
assert_relative_eq!(f64::INFINITY, f64::INFINITY);
|
||||
assert_relative_eq!(f64::NEG_INFINITY, f64::NEG_INFINITY);
|
||||
assert_relative_ne!(f64::NEG_INFINITY, f64::INFINITY);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nan() {
|
||||
assert_relative_ne!(f64::NAN, f64::NAN);
|
||||
|
||||
assert_relative_ne!(f64::NAN, 0.0);
|
||||
assert_relative_ne!(-0.0, f64::NAN);
|
||||
assert_relative_ne!(f64::NAN, -0.0);
|
||||
assert_relative_ne!(0.0, f64::NAN);
|
||||
|
||||
assert_relative_ne!(f64::NAN, f64::INFINITY);
|
||||
assert_relative_ne!(f64::INFINITY, f64::NAN);
|
||||
assert_relative_ne!(f64::NAN, f64::NEG_INFINITY);
|
||||
assert_relative_ne!(f64::NEG_INFINITY, f64::NAN);
|
||||
|
||||
assert_relative_ne!(f64::NAN, f64::MAX);
|
||||
assert_relative_ne!(f64::MAX, f64::NAN);
|
||||
assert_relative_ne!(f64::NAN, -f64::MAX);
|
||||
assert_relative_ne!(-f64::MAX, f64::NAN);
|
||||
|
||||
assert_relative_ne!(f64::NAN, f64::MIN_POSITIVE);
|
||||
assert_relative_ne!(f64::MIN_POSITIVE, f64::NAN);
|
||||
assert_relative_ne!(f64::NAN, -f64::MIN_POSITIVE);
|
||||
assert_relative_ne!(-f64::MIN_POSITIVE, f64::NAN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_opposite_signs() {
|
||||
assert_relative_ne!(1.000000001f64, -1.0f64);
|
||||
assert_relative_ne!(-1.0f64, 1.000000001f64);
|
||||
assert_relative_ne!(-1.000000001f64, 1.0f64);
|
||||
assert_relative_ne!(1.0f64, -1.000000001f64);
|
||||
|
||||
assert_relative_eq!(10.0 * f64::MIN_POSITIVE, 10.0 * -f64::MIN_POSITIVE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_close_to_zero() {
|
||||
assert_relative_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE);
|
||||
assert_relative_eq!(f64::MIN_POSITIVE, -f64::MIN_POSITIVE);
|
||||
assert_relative_eq!(-f64::MIN_POSITIVE, f64::MIN_POSITIVE);
|
||||
|
||||
assert_relative_eq!(f64::MIN_POSITIVE, 0.0f64);
|
||||
assert_relative_eq!(0.0f64, f64::MIN_POSITIVE);
|
||||
assert_relative_eq!(-f64::MIN_POSITIVE, 0.0f64);
|
||||
assert_relative_eq!(0.0f64, -f64::MIN_POSITIVE);
|
||||
|
||||
assert_relative_ne!(0.000000000000001f64, -f64::MIN_POSITIVE);
|
||||
assert_relative_ne!(0.000000000000001f64, f64::MIN_POSITIVE);
|
||||
assert_relative_ne!(f64::MIN_POSITIVE, 0.000000000000001f64);
|
||||
assert_relative_ne!(-f64::MIN_POSITIVE, 0.000000000000001f64);
|
||||
}
|
||||
}
|
||||
|
||||
mod test_ref {
|
||||
mod test_f32 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_relative_eq!(&1.0f32, &1.0f32);
|
||||
assert_relative_ne!(&1.0f32, &2.0f32);
|
||||
}
|
||||
}
|
||||
|
||||
mod test_f64 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_relative_eq!(&1.0f64, &1.0f64);
|
||||
assert_relative_ne!(&1.0f64, &2.0f64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod test_slice {
|
||||
mod test_f32 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_relative_eq!([1.0f32, 2.0f32][..], [1.0f32, 2.0f32][..]);
|
||||
assert_relative_ne!([1.0f32, 2.0f32][..], [2.0f32, 1.0f32][..]);
|
||||
}
|
||||
}
|
||||
|
||||
mod test_f64 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_relative_eq!([1.0f64, 2.0f64][..], [1.0f64, 2.0f64][..]);
|
||||
assert_relative_ne!([1.0f64, 2.0f64][..], [2.0f64, 1.0f64][..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "num-complex")]
|
||||
mod test_complex {
|
||||
extern crate num_complex;
|
||||
pub use self::num_complex::Complex;
|
||||
|
||||
mod test_f32 {
|
||||
use super::Complex;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_relative_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
|
||||
assert_relative_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_relative_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_relative_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
|
||||
}
|
||||
}
|
||||
|
||||
mod test_f64 {
|
||||
use super::Complex;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_relative_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
|
||||
assert_relative_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_relative_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_relative_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,438 @@
|
|||
// Copyright 2015 Brendan Zabarauskas
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Test cases derived from:
|
||||
// https://github.com/Pybonacci/puntoflotante.org/blob/master/content/errors/NearlyEqualsTest.java
|
||||
|
||||
#[macro_use]
|
||||
extern crate approx;
|
||||
|
||||
mod test_f32 {
|
||||
use std::f32;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_ulps_eq!(1.0f32, 1.0f32);
|
||||
assert_ulps_ne!(1.0f32, 2.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_ulps_eq!(1.0f32, 2.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_ulps_ne!(1.0f32, 1.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big() {
|
||||
assert_ulps_eq!(100000000.0f32, 100000001.0f32);
|
||||
assert_ulps_eq!(100000001.0f32, 100000000.0f32);
|
||||
assert_ulps_ne!(10000.0f32, 10001.0f32);
|
||||
assert_ulps_ne!(10001.0f32, 10000.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big_neg() {
|
||||
assert_ulps_eq!(-100000000.0f32, -100000001.0f32);
|
||||
assert_ulps_eq!(-100000001.0f32, -100000000.0f32);
|
||||
assert_ulps_ne!(-10000.0f32, -10001.0f32);
|
||||
assert_ulps_ne!(-10001.0f32, -10000.0f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid() {
|
||||
assert_ulps_eq!(1.0000001f32, 1.0000002f32);
|
||||
assert_ulps_eq!(1.0000002f32, 1.0000001f32);
|
||||
assert_ulps_ne!(1.000001f32, 1.000002f32);
|
||||
assert_ulps_ne!(1.000002f32, 1.000001f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid_neg() {
|
||||
assert_ulps_eq!(-1.0000001f32, -1.0000002f32);
|
||||
assert_ulps_eq!(-1.0000002f32, -1.0000001f32);
|
||||
assert_ulps_ne!(-1.000001f32, -1.000002f32);
|
||||
assert_ulps_ne!(-1.000002f32, -1.000001f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small() {
|
||||
assert_ulps_eq!(0.000010001f32, 0.000010002f32);
|
||||
assert_ulps_eq!(0.000010002f32, 0.000010001f32);
|
||||
assert_ulps_ne!(0.000001002f32, 0.0000001001f32);
|
||||
assert_ulps_ne!(0.000001001f32, 0.0000001002f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small_neg() {
|
||||
assert_ulps_eq!(-0.000010001f32, -0.000010002f32);
|
||||
assert_ulps_eq!(-0.000010002f32, -0.000010001f32);
|
||||
assert_ulps_ne!(-0.000001002f32, -0.0000001001f32);
|
||||
assert_ulps_ne!(-0.000001001f32, -0.0000001002f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero() {
|
||||
assert_ulps_eq!(0.0f32, 0.0f32);
|
||||
assert_ulps_eq!(0.0f32, -0.0f32);
|
||||
assert_ulps_eq!(-0.0f32, -0.0f32);
|
||||
|
||||
assert_ulps_ne!(0.000001f32, 0.0f32);
|
||||
assert_ulps_ne!(0.0f32, 0.000001f32);
|
||||
assert_ulps_ne!(-0.000001f32, 0.0f32);
|
||||
assert_ulps_ne!(0.0f32, -0.000001f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_epsilon() {
|
||||
assert_ulps_eq!(0.0f32, 1e-40f32, epsilon = 1e-40f32);
|
||||
assert_ulps_eq!(1e-40f32, 0.0f32, epsilon = 1e-40f32);
|
||||
assert_ulps_eq!(0.0f32, -1e-40f32, epsilon = 1e-40f32);
|
||||
assert_ulps_eq!(-1e-40f32, 0.0f32, epsilon = 1e-40f32);
|
||||
|
||||
assert_ulps_ne!(1e-40f32, 0.0f32, epsilon = 1e-41f32);
|
||||
assert_ulps_ne!(0.0f32, 1e-40f32, epsilon = 1e-41f32);
|
||||
assert_ulps_ne!(-1e-40f32, 0.0f32, epsilon = 1e-41f32);
|
||||
assert_ulps_ne!(0.0f32, -1e-40f32, epsilon = 1e-41f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max() {
|
||||
assert_ulps_eq!(f32::MAX, f32::MAX);
|
||||
assert_ulps_ne!(f32::MAX, -f32::MAX);
|
||||
assert_ulps_ne!(-f32::MAX, f32::MAX);
|
||||
assert_ulps_ne!(f32::MAX, f32::MAX / 2.0);
|
||||
assert_ulps_ne!(f32::MAX, -f32::MAX / 2.0);
|
||||
assert_ulps_ne!(-f32::MAX, f32::MAX / 2.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_infinity() {
|
||||
assert_ulps_eq!(f32::INFINITY, f32::INFINITY);
|
||||
assert_ulps_eq!(f32::NEG_INFINITY, f32::NEG_INFINITY);
|
||||
assert_ulps_ne!(f32::NEG_INFINITY, f32::INFINITY);
|
||||
assert_ulps_eq!(f32::INFINITY, f32::MAX);
|
||||
assert_ulps_eq!(f32::NEG_INFINITY, -f32::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nan() {
|
||||
assert_ulps_ne!(f32::NAN, f32::NAN);
|
||||
|
||||
assert_ulps_ne!(f32::NAN, 0.0);
|
||||
assert_ulps_ne!(-0.0, f32::NAN);
|
||||
assert_ulps_ne!(f32::NAN, -0.0);
|
||||
assert_ulps_ne!(0.0, f32::NAN);
|
||||
|
||||
assert_ulps_ne!(f32::NAN, f32::INFINITY);
|
||||
assert_ulps_ne!(f32::INFINITY, f32::NAN);
|
||||
assert_ulps_ne!(f32::NAN, f32::NEG_INFINITY);
|
||||
assert_ulps_ne!(f32::NEG_INFINITY, f32::NAN);
|
||||
|
||||
assert_ulps_ne!(f32::NAN, f32::MAX);
|
||||
assert_ulps_ne!(f32::MAX, f32::NAN);
|
||||
assert_ulps_ne!(f32::NAN, -f32::MAX);
|
||||
assert_ulps_ne!(-f32::MAX, f32::NAN);
|
||||
|
||||
assert_ulps_ne!(f32::NAN, f32::MIN_POSITIVE);
|
||||
assert_ulps_ne!(f32::MIN_POSITIVE, f32::NAN);
|
||||
assert_ulps_ne!(f32::NAN, -f32::MIN_POSITIVE);
|
||||
assert_ulps_ne!(-f32::MIN_POSITIVE, f32::NAN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_opposite_signs() {
|
||||
assert_ulps_ne!(1.000000001f32, -1.0f32);
|
||||
assert_ulps_ne!(-1.0f32, 1.000000001f32);
|
||||
assert_ulps_ne!(-1.000000001f32, 1.0f32);
|
||||
assert_ulps_ne!(1.0f32, -1.000000001f32);
|
||||
|
||||
assert_ulps_eq!(10.0 * f32::MIN_POSITIVE, 10.0 * -f32::MIN_POSITIVE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_close_to_zero() {
|
||||
assert_ulps_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE);
|
||||
assert_ulps_eq!(f32::MIN_POSITIVE, -f32::MIN_POSITIVE);
|
||||
assert_ulps_eq!(-f32::MIN_POSITIVE, f32::MIN_POSITIVE);
|
||||
|
||||
assert_ulps_eq!(f32::MIN_POSITIVE, 0.0f32);
|
||||
assert_ulps_eq!(0.0f32, f32::MIN_POSITIVE);
|
||||
assert_ulps_eq!(-f32::MIN_POSITIVE, 0.0f32);
|
||||
assert_ulps_eq!(0.0f32, -f32::MIN_POSITIVE);
|
||||
|
||||
assert_ulps_ne!(0.000001f32, -f32::MIN_POSITIVE);
|
||||
assert_ulps_ne!(0.000001f32, f32::MIN_POSITIVE);
|
||||
assert_ulps_ne!(f32::MIN_POSITIVE, 0.000001f32);
|
||||
assert_ulps_ne!(-f32::MIN_POSITIVE, 0.000001f32);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_f64 {
|
||||
use std::f64;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_ulps_eq!(1.0f64, 1.0f64);
|
||||
assert_ulps_ne!(1.0f64, 2.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_ulps_eq!(1.0f64, 2.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_ulps_ne!(1.0f64, 1.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big() {
|
||||
assert_ulps_eq!(10000000000000000.0f64, 10000000000000001.0f64);
|
||||
assert_ulps_eq!(10000000000000001.0f64, 10000000000000000.0f64);
|
||||
assert_ulps_ne!(1000000000000000.0f64, 1000000000000001.0f64);
|
||||
assert_ulps_ne!(1000000000000001.0f64, 1000000000000000.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_big_neg() {
|
||||
assert_ulps_eq!(-10000000000000000.0f64, -10000000000000001.0f64);
|
||||
assert_ulps_eq!(-10000000000000001.0f64, -10000000000000000.0f64);
|
||||
assert_ulps_ne!(-1000000000000000.0f64, -1000000000000001.0f64);
|
||||
assert_ulps_ne!(-1000000000000001.0f64, -1000000000000000.0f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid() {
|
||||
assert_ulps_eq!(1.0000000000000001f64, 1.0000000000000002f64);
|
||||
assert_ulps_eq!(1.0000000000000002f64, 1.0000000000000001f64);
|
||||
assert_ulps_ne!(1.000000000000001f64, 1.0000000000000022f64);
|
||||
assert_ulps_ne!(1.0000000000000022f64, 1.000000000000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mid_neg() {
|
||||
assert_ulps_eq!(-1.0000000000000001f64, -1.0000000000000002f64);
|
||||
assert_ulps_eq!(-1.0000000000000002f64, -1.0000000000000001f64);
|
||||
assert_ulps_ne!(-1.000000000000001f64, -1.0000000000000022f64);
|
||||
assert_ulps_ne!(-1.0000000000000022f64, -1.000000000000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small() {
|
||||
assert_ulps_eq!(0.0000000100000001f64, 0.0000000100000002f64);
|
||||
assert_ulps_eq!(0.0000000100000002f64, 0.0000000100000001f64);
|
||||
assert_ulps_ne!(0.0000000100000001f64, 0.0000000010000002f64);
|
||||
assert_ulps_ne!(0.0000000100000002f64, 0.0000000010000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small_neg() {
|
||||
assert_ulps_eq!(-0.0000000100000001f64, -0.0000000100000002f64);
|
||||
assert_ulps_eq!(-0.0000000100000002f64, -0.0000000100000001f64);
|
||||
assert_ulps_ne!(-0.0000000100000001f64, -0.0000000010000002f64);
|
||||
assert_ulps_ne!(-0.0000000100000002f64, -0.0000000010000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero() {
|
||||
assert_ulps_eq!(0.0f64, 0.0f64);
|
||||
assert_ulps_eq!(0.0f64, -0.0f64);
|
||||
assert_ulps_eq!(-0.0f64, -0.0f64);
|
||||
|
||||
assert_ulps_ne!(0.000000000000001f64, 0.0f64);
|
||||
assert_ulps_ne!(0.0f64, 0.000000000000001f64);
|
||||
assert_ulps_ne!(-0.000000000000001f64, 0.0f64);
|
||||
assert_ulps_ne!(0.0f64, -0.000000000000001f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_epsilon() {
|
||||
assert_ulps_eq!(0.0f64, 1e-40f64, epsilon = 1e-40f64);
|
||||
assert_ulps_eq!(1e-40f64, 0.0f64, epsilon = 1e-40f64);
|
||||
assert_ulps_eq!(0.0f64, -1e-40f64, epsilon = 1e-40f64);
|
||||
assert_ulps_eq!(-1e-40f64, 0.0f64, epsilon = 1e-40f64);
|
||||
|
||||
assert_ulps_ne!(1e-40f64, 0.0f64, epsilon = 1e-41f64);
|
||||
assert_ulps_ne!(0.0f64, 1e-40f64, epsilon = 1e-41f64);
|
||||
assert_ulps_ne!(-1e-40f64, 0.0f64, epsilon = 1e-41f64);
|
||||
assert_ulps_ne!(0.0f64, -1e-40f64, epsilon = 1e-41f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max() {
|
||||
assert_ulps_eq!(f64::MAX, f64::MAX);
|
||||
assert_ulps_ne!(f64::MAX, -f64::MAX);
|
||||
assert_ulps_ne!(-f64::MAX, f64::MAX);
|
||||
assert_ulps_ne!(f64::MAX, f64::MAX / 2.0);
|
||||
assert_ulps_ne!(f64::MAX, -f64::MAX / 2.0);
|
||||
assert_ulps_ne!(-f64::MAX, f64::MAX / 2.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_infinity() {
|
||||
assert_ulps_eq!(f64::INFINITY, f64::INFINITY);
|
||||
assert_ulps_eq!(f64::NEG_INFINITY, f64::NEG_INFINITY);
|
||||
assert_ulps_ne!(f64::NEG_INFINITY, f64::INFINITY);
|
||||
assert_ulps_eq!(f64::INFINITY, f64::MAX);
|
||||
assert_ulps_eq!(f64::NEG_INFINITY, -f64::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nan() {
|
||||
assert_ulps_ne!(f64::NAN, f64::NAN);
|
||||
|
||||
assert_ulps_ne!(f64::NAN, 0.0);
|
||||
assert_ulps_ne!(-0.0, f64::NAN);
|
||||
assert_ulps_ne!(f64::NAN, -0.0);
|
||||
assert_ulps_ne!(0.0, f64::NAN);
|
||||
|
||||
assert_ulps_ne!(f64::NAN, f64::INFINITY);
|
||||
assert_ulps_ne!(f64::INFINITY, f64::NAN);
|
||||
assert_ulps_ne!(f64::NAN, f64::NEG_INFINITY);
|
||||
assert_ulps_ne!(f64::NEG_INFINITY, f64::NAN);
|
||||
|
||||
assert_ulps_ne!(f64::NAN, f64::MAX);
|
||||
assert_ulps_ne!(f64::MAX, f64::NAN);
|
||||
assert_ulps_ne!(f64::NAN, -f64::MAX);
|
||||
assert_ulps_ne!(-f64::MAX, f64::NAN);
|
||||
|
||||
assert_ulps_ne!(f64::NAN, f64::MIN_POSITIVE);
|
||||
assert_ulps_ne!(f64::MIN_POSITIVE, f64::NAN);
|
||||
assert_ulps_ne!(f64::NAN, -f64::MIN_POSITIVE);
|
||||
assert_ulps_ne!(-f64::MIN_POSITIVE, f64::NAN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_opposite_signs() {
|
||||
assert_ulps_ne!(1.000000001f64, -1.0f64);
|
||||
assert_ulps_ne!(-1.0f64, 1.000000001f64);
|
||||
assert_ulps_ne!(-1.000000001f64, 1.0f64);
|
||||
assert_ulps_ne!(1.0f64, -1.000000001f64);
|
||||
|
||||
assert_ulps_eq!(10.0 * f64::MIN_POSITIVE, 10.0 * -f64::MIN_POSITIVE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_close_to_zero() {
|
||||
assert_ulps_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE);
|
||||
assert_ulps_eq!(f64::MIN_POSITIVE, -f64::MIN_POSITIVE);
|
||||
assert_ulps_eq!(-f64::MIN_POSITIVE, f64::MIN_POSITIVE);
|
||||
|
||||
assert_ulps_eq!(f64::MIN_POSITIVE, 0.0f64);
|
||||
assert_ulps_eq!(0.0f64, f64::MIN_POSITIVE);
|
||||
assert_ulps_eq!(-f64::MIN_POSITIVE, 0.0f64);
|
||||
assert_ulps_eq!(0.0f64, -f64::MIN_POSITIVE);
|
||||
|
||||
assert_ulps_ne!(0.000000000000001f64, -f64::MIN_POSITIVE);
|
||||
assert_ulps_ne!(0.000000000000001f64, f64::MIN_POSITIVE);
|
||||
assert_ulps_ne!(f64::MIN_POSITIVE, 0.000000000000001f64);
|
||||
assert_ulps_ne!(-f64::MIN_POSITIVE, 0.000000000000001f64);
|
||||
}
|
||||
}
|
||||
|
||||
mod test_ref {
|
||||
mod test_f32 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_ulps_eq!(&1.0f32, &1.0f32);
|
||||
assert_ulps_ne!(&1.0f32, &2.0f32);
|
||||
}
|
||||
}
|
||||
|
||||
mod test_f64 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_ulps_eq!(&1.0f64, &1.0f64);
|
||||
assert_ulps_ne!(&1.0f64, &2.0f64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod test_slice {
|
||||
mod test_f32 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_ulps_eq!([1.0f32, 2.0f32][..], [1.0f32, 2.0f32][..]);
|
||||
assert_ulps_ne!([1.0f32, 2.0f32][..], [2.0f32, 1.0f32][..]);
|
||||
}
|
||||
}
|
||||
|
||||
mod test_f64 {
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_ulps_eq!([1.0f64, 2.0f64][..], [1.0f64, 2.0f64][..]);
|
||||
assert_ulps_ne!([1.0f64, 2.0f64][..], [2.0f64, 1.0f64][..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "num-complex")]
|
||||
mod test_complex {
|
||||
extern crate num_complex;
|
||||
pub use self::num_complex::Complex;
|
||||
|
||||
mod test_f32 {
|
||||
use super::Complex;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_ulps_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
|
||||
assert_ulps_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_ulps_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_ulps_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
|
||||
}
|
||||
}
|
||||
|
||||
mod test_f64 {
|
||||
use super::Complex;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
assert_ulps_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
|
||||
assert_ulps_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_eq() {
|
||||
assert_ulps_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_basic_panic_ne() {
|
||||
assert_ulps_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"files":{"Cargo.toml":"adb58f911fb48a1a3828bda514c2da5ce5650e8e98eb7217f014b1720fe11d38","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0245ee104228a100ce5fceecf43e25faae450494d9173f43fd94c27d69fdac13","README.rst":"a3cff166a7b622233cd9ccfbe472f910267629f4b41e387ad8573fc06eac37d6","benches/arraystring.rs":"f12b890977117ebde4ca42bcd6b91f2a6a087f2b235aaca6d15e30d125ae9f67","benches/extend.rs":"8c8f78df7e90b62c7e160cf5ea6c61b90bc4035a9704b6a179a1e01d8fafe2e9","build.rs":"fc29930f06cb4dde58f43d2f30b28c366ca3bafcd7e44b41a1c250d60fa900fb","custom.css":"e6f2cd299392337b4e2959c52f422e5b7be11920ea98d10db44d10ddef5ed47c","src/array.rs":"67fb063ee515bfd4968ede219dff81091a5935ef93529ebd1bb2a716ea3ed3d3","src/array_string.rs":"8a1a4cfc1699e2373815e57dc676a87a30629f91a9e861c866ccc6cb1381eadf","src/char.rs":"64a08f6a743b67bf2c96483f91c2fdaea79f6e91df5cd752f770b16a6b1d5b1e","src/errors.rs":"dde99bffaddfd45396aab7e07642cc018ef5435fe60c4f26a2c05a36555be18c","src/lib.rs":"34167f35d9a5b887e6fb424500bb64764d68d029d0e374827886b05ad4d26bca","src/maybe_uninit.rs":"7cca39ffe0f122716baaa174b433ff5fe9c93560f8e54fc077a0083500eaa1dd","src/maybe_uninit_nodrop.rs":"7fb2e24bf815dd6e1d104056fa9be4a11de7e0f0e5474742af186c580a6b47cc","src/maybe_uninit_stable.rs":"3f7daba622cf5df86992b451b46636a491c9611292f59969eb6890a10a00476d","src/range.rs":"65744ab7def208a1ab155ea2448fe9ea7fc14f33211361b1041f540125b32efd","tests/serde.rs":"ef3986a82656b09f3fbb14358e767051ffabe09592c61e69ea695cb88760e8ba","tests/tests.rs":"8066a4aca7b40356525ed87f7658773e610ef4fce3522b0cc0f301384d880f00"},"package":"cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"}
|
|
@ -0,0 +1,48 @@
|
|||
"""
|
||||
cargo-raze crate build file.
|
||||
|
||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||
"""
|
||||
package(default_visibility = [
|
||||
# Public for visibility by "@raze__crate__version//" targets.
|
||||
#
|
||||
# Prefer access through "//third_party/cargo", which limits external
|
||||
# visibility to explicit Cargo.toml dependencies.
|
||||
"//visibility:public",
|
||||
])
|
||||
|
||||
licenses([
|
||||
"notice", # "MIT,Apache-2.0"
|
||||
])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||
"rust_library",
|
||||
"rust_binary",
|
||||
"rust_test",
|
||||
)
|
||||
|
||||
|
||||
# Unsupported target "arraystring" with type "bench" omitted
|
||||
|
||||
rust_library(
|
||||
name = "arrayvec",
|
||||
crate_root = "src/lib.rs",
|
||||
crate_type = "lib",
|
||||
edition = "2015",
|
||||
srcs = glob(["**/*.rs"]),
|
||||
deps = [
|
||||
"//third_party/cargo/vendor/nodrop-0.1.14:nodrop",
|
||||
],
|
||||
rustc_flags = [
|
||||
"--cap-lints=allow",
|
||||
],
|
||||
version = "0.4.12",
|
||||
crate_features = [
|
||||
],
|
||||
)
|
||||
|
||||
# Unsupported target "build-script-build" with type "custom-build" omitted
|
||||
# Unsupported target "extend" with type "bench" omitted
|
||||
# Unsupported target "serde" with type "test" omitted
|
||||
# Unsupported target "tests" with type "test" omitted
|
|
@ -0,0 +1,62 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
name = "arrayvec"
|
||||
version = "0.4.12"
|
||||
authors = ["bluss"]
|
||||
description = "A vector with fixed capacity, backed by an array (it can be stored on the stack too). Implements fixed capacity ArrayVec and ArrayString."
|
||||
documentation = "https://docs.rs/arrayvec/"
|
||||
keywords = ["stack", "vector", "array", "data-structure", "no_std"]
|
||||
categories = ["data-structures", "no-std"]
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/bluss/arrayvec"
|
||||
[package.metadata.docs.rs]
|
||||
features = ["serde-1"]
|
||||
|
||||
[package.metadata.release]
|
||||
no-dev-version = true
|
||||
tag-name = "{{version}}"
|
||||
|
||||
[[bench]]
|
||||
name = "extend"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "arraystring"
|
||||
harness = false
|
||||
[dependencies.nodrop]
|
||||
version = "0.1.12"
|
||||
default-features = false
|
||||
|
||||
[dependencies.serde]
|
||||
version = "1.0"
|
||||
optional = true
|
||||
default-features = false
|
||||
[dev-dependencies.bencher]
|
||||
version = "0.1.4"
|
||||
|
||||
[dev-dependencies.matches]
|
||||
version = "0.1"
|
||||
|
||||
[dev-dependencies.serde_test]
|
||||
version = "1.0"
|
||||
|
||||
[build-dependencies]
|
||||
|
||||
[features]
|
||||
array-sizes-129-255 = []
|
||||
array-sizes-33-128 = []
|
||||
default = ["std"]
|
||||
serde-1 = ["serde"]
|
||||
std = []
|
||||
use_union = []
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) Ulrik Sverdrup "bluss" 2015-2017
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,224 @@
|
|||
|
||||
arrayvec
|
||||
========
|
||||
|
||||
A vector with fixed capacity.
|
||||
|
||||
Please read the `API documentation here`__
|
||||
|
||||
__ https://docs.rs/arrayvec
|
||||
|
||||
|build_status|_ |crates|_ |crates2|_
|
||||
|
||||
.. |build_status| image:: https://travis-ci.org/bluss/arrayvec.svg
|
||||
.. _build_status: https://travis-ci.org/bluss/arrayvec
|
||||
|
||||
.. |crates| image:: http://meritbadge.herokuapp.com/arrayvec
|
||||
.. _crates: https://crates.io/crates/arrayvec
|
||||
|
||||
.. |crates2| image:: http://meritbadge.herokuapp.com/nodrop
|
||||
.. _crates2: https://crates.io/crates/nodrop
|
||||
|
||||
Recent Changes (arrayvec)
|
||||
-------------------------
|
||||
|
||||
- 0.4.12
|
||||
|
||||
- Use raw pointers instead of ``get_unchecked_mut`` where the target may be
|
||||
uninitialized a everywhere relevant in the ArrayVec implementation.
|
||||
|
||||
- 0.4.11
|
||||
|
||||
- In Rust 1.36 or later, use newly stable MaybeUninit. This extends the
|
||||
soundness work introduced in 0.4.9, we are finally able to use this in
|
||||
stable. We use feature detection (build script) to enable this at build
|
||||
time.
|
||||
|
||||
- 0.4.10
|
||||
|
||||
- Use ``repr(C)`` in the ``union`` version that was introduced in 0.4.9, to
|
||||
allay some soundness concerns.
|
||||
|
||||
- 0.4.9
|
||||
|
||||
- Use ``union`` in the implementation on when this is detected to be supported
|
||||
(nightly only for now). This is a better solution for treating uninitialized
|
||||
regions correctly, and we'll use it in stable Rust as soon as we are able.
|
||||
When this is enabled, the ``ArrayVec`` has no space overhead in its memory
|
||||
layout, although the size of the vec should not be relied upon. (See `#114`_)
|
||||
- ``ArrayString`` updated to not use uninitialized memory, it instead zeros its
|
||||
backing array. This will be refined in the next version, since we
|
||||
need to make changes to the user visible API.
|
||||
- The ``use_union`` feature now does nothing (like its documentation foretold).
|
||||
|
||||
.. _`#114`: https://github.com/bluss/arrayvec/pull/114
|
||||
|
||||
- 0.4.8
|
||||
|
||||
- Implement Clone and Debug for ``IntoIter`` by @clarcharr
|
||||
- Add more array sizes under crate features. These cover all in the range
|
||||
up to 128 and 129 to 255 respectively (we have a few of those by default):
|
||||
|
||||
- ``array-size-33-128``
|
||||
- ``array-size-129-255``
|
||||
|
||||
- 0.4.7
|
||||
|
||||
- Fix future compat warning about raw pointer casts
|
||||
- Use ``drop_in_place`` when dropping the arrayvec by-value iterator
|
||||
- Decrease mininum Rust version (see docs) by @jeehoonkang
|
||||
|
||||
- 0.3.25
|
||||
|
||||
- Fix future compat warning about raw pointer casts
|
||||
|
||||
- 0.4.6
|
||||
|
||||
- Fix compilation on 16-bit targets. This means, the 65536 array size is not
|
||||
included on these targets.
|
||||
|
||||
- 0.3.24
|
||||
|
||||
- Fix compilation on 16-bit targets. This means, the 65536 array size is not
|
||||
included on these targets.
|
||||
- Fix license files so that they are both included (was fixed in 0.4 before)
|
||||
|
||||
- 0.4.5
|
||||
|
||||
- Add methods to ``ArrayString`` by @DenialAdams:
|
||||
|
||||
- ``.pop() -> Option<char>``
|
||||
- ``.truncate(new_len)``
|
||||
- ``.remove(index) -> char``
|
||||
|
||||
- Remove dependency on crate odds
|
||||
- Document debug assertions in unsafe methods better
|
||||
|
||||
- 0.4.4
|
||||
|
||||
- Add method ``ArrayVec::truncate()`` by @niklasf
|
||||
|
||||
- 0.4.3
|
||||
|
||||
- Improve performance for ``ArrayVec::extend`` with a lower level
|
||||
implementation (#74)
|
||||
- Small cleanup in dependencies (use no std for crates where we don't need more)
|
||||
|
||||
- 0.4.2
|
||||
|
||||
- Add constructor method ``new`` to ``CapacityError``.
|
||||
|
||||
- 0.4.1
|
||||
|
||||
- Add ``Default`` impl to ``ArrayString`` by @tbu-
|
||||
|
||||
- 0.4.0
|
||||
|
||||
- Reformed signatures and error handling by @bluss and @tbu-:
|
||||
|
||||
- ``ArrayVec``'s ``push, insert, remove, swap_remove`` now match ``Vec``'s
|
||||
corresponding signature and panic on capacity errors where applicable.
|
||||
- Add fallible methods ``try_push, insert`` and checked methods
|
||||
``pop_at, swap_pop``.
|
||||
- Similar changes to ``ArrayString``'s push methods.
|
||||
|
||||
- Use a local version of the ``RangeArgument`` trait
|
||||
- Add array sizes 50, 150, 200 by @daboross
|
||||
- Support serde 1.0 by @daboross
|
||||
- New method ``.push_unchecked()`` by @niklasf
|
||||
- ``ArrayString`` implements ``PartialOrd, Ord`` by @tbu-
|
||||
- Require Rust 1.14
|
||||
- crate feature ``use_generic_array`` was dropped.
|
||||
|
||||
- 0.3.23
|
||||
|
||||
- Implement ``PartialOrd, Ord`` as well as ``PartialOrd<str>`` for
|
||||
``ArrayString``.
|
||||
|
||||
- 0.3.22
|
||||
|
||||
- Implement ``Array`` for the 65536 size
|
||||
|
||||
- 0.3.21
|
||||
|
||||
- Use ``encode_utf8`` from crate odds
|
||||
- Add constructor ``ArrayString::from_byte_string``
|
||||
|
||||
- 0.3.20
|
||||
|
||||
- Simplify and speed up ``ArrayString``’s ``.push(char)``-
|
||||
|
||||
- 0.3.19
|
||||
|
||||
- Add new crate feature ``use_generic_array`` which allows using their
|
||||
``GenericArray`` just like a regular fixed size array for the storage
|
||||
of an ``ArrayVec``.
|
||||
|
||||
- 0.3.18
|
||||
|
||||
- Fix bounds check in ``ArrayVec::insert``!
|
||||
It would be buggy if ``self.len() < index < self.capacity()``. Take note of
|
||||
the push out behavior specified in the docs.
|
||||
|
||||
- 0.3.17
|
||||
|
||||
- Added crate feature ``use_union`` which forwards to the nodrop crate feature
|
||||
- Added methods ``.is_full()`` to ``ArrayVec`` and ``ArrayString``.
|
||||
|
||||
- 0.3.16
|
||||
|
||||
- Added method ``.retain()`` to ``ArrayVec``.
|
||||
- Added methods ``.as_slice(), .as_mut_slice()`` to ``ArrayVec`` and ``.as_str()``
|
||||
to ``ArrayString``.
|
||||
|
||||
- 0.3.15
|
||||
|
||||
- Add feature std, which you can opt out of to use ``no_std`` (requires Rust 1.6
|
||||
to opt out).
|
||||
- Implement ``Clone::clone_from`` for ArrayVec and ArrayString
|
||||
|
||||
- 0.3.14
|
||||
|
||||
- Add ``ArrayString::from(&str)``
|
||||
|
||||
- 0.3.13
|
||||
|
||||
- Added ``DerefMut`` impl for ``ArrayString``.
|
||||
- Added method ``.simplify()`` to drop the element for ``CapacityError``.
|
||||
- Added method ``.dispose()`` to ``ArrayVec``
|
||||
|
||||
- 0.3.12
|
||||
|
||||
- Added ArrayString, a fixed capacity analogy of String
|
||||
|
||||
- 0.3.11
|
||||
|
||||
- Added trait impls Default, PartialOrd, Ord, Write for ArrayVec
|
||||
|
||||
- 0.3.10
|
||||
|
||||
- Go back to using external NoDrop, fixing a panic safety bug (issue #3)
|
||||
|
||||
- 0.3.8
|
||||
|
||||
- Inline the non-dropping logic to remove one drop flag in the
|
||||
ArrayVec representation.
|
||||
|
||||
- 0.3.7
|
||||
|
||||
- Added method .into_inner()
|
||||
- Added unsafe method .set_len()
|
||||
|
||||
|
||||
License
|
||||
=======
|
||||
|
||||
Dual-licensed to be compatible with the Rust project.
|
||||
|
||||
Licensed under the Apache License, Version 2.0
|
||||
http://www.apache.org/licenses/LICENSE-2.0 or the MIT license
|
||||
http://opensource.org/licenses/MIT, at your
|
||||
option. This file may not be copied, modified, or distributed
|
||||
except according to those terms.
|
||||
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
|
||||
extern crate arrayvec;
|
||||
#[macro_use] extern crate bencher;
|
||||
|
||||
use arrayvec::ArrayString;
|
||||
|
||||
use bencher::Bencher;
|
||||
|
||||
fn try_push_c(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
while v.try_push('c').is_ok() {
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn try_push_alpha(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
while v.try_push('α').is_ok() {
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
// Yes, pushing a string char-by-char is slow. Use .push_str.
|
||||
fn try_push_string(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
let input = "abcαβγ“”";
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
for ch in input.chars().cycle() {
|
||||
if !v.try_push(ch).is_ok() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn push_c(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
while !v.is_full() {
|
||||
v.push('c');
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn push_alpha(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
while !v.is_full() {
|
||||
v.push('α');
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn push_string(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
let input = "abcαβγ“”";
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
for ch in input.chars().cycle() {
|
||||
if !v.is_full() {
|
||||
v.push(ch);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
benchmark_group!(benches, try_push_c, try_push_alpha, try_push_string, push_c,
|
||||
push_alpha, push_string);
|
||||
benchmark_main!(benches);
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
extern crate arrayvec;
|
||||
#[macro_use] extern crate bencher;
|
||||
|
||||
use arrayvec::ArrayVec;
|
||||
|
||||
use bencher::Bencher;
|
||||
|
||||
fn extend_with_constant(b: &mut Bencher) {
|
||||
let mut v = ArrayVec::<[u8; 512]>::new();
|
||||
let cap = v.capacity();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
v.extend((0..cap).map(|_| 1));
|
||||
v[0]
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn extend_with_range(b: &mut Bencher) {
|
||||
let mut v = ArrayVec::<[u8; 512]>::new();
|
||||
let cap = v.capacity();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
v.extend((0..cap).map(|x| x as _));
|
||||
v[0]
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn extend_with_slice(b: &mut Bencher) {
|
||||
let mut v = ArrayVec::<[u8; 512]>::new();
|
||||
let data = [1; 512];
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
v.extend(data.iter().cloned());
|
||||
v[0]
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
benchmark_group!(benches, extend_with_constant, extend_with_range, extend_with_slice);
|
||||
benchmark_main!(benches);
|
|
@ -0,0 +1,90 @@
|
|||
|
||||
use std::env;
|
||||
use std::io::Write;
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
fn main() {
|
||||
// we need to output *some* file to opt out of the default
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
|
||||
detect_maybe_uninit();
|
||||
}
|
||||
|
||||
fn detect_maybe_uninit() {
|
||||
let has_stable_maybe_uninit = probe(&stable_maybe_uninit());
|
||||
if has_stable_maybe_uninit {
|
||||
println!("cargo:rustc-cfg=has_stable_maybe_uninit");
|
||||
return;
|
||||
}
|
||||
let has_unstable_union_with_md = probe(&maybe_uninit_code(true));
|
||||
if has_unstable_union_with_md {
|
||||
println!("cargo:rustc-cfg=has_manually_drop_in_union");
|
||||
println!("cargo:rustc-cfg=has_union_feature");
|
||||
}
|
||||
}
|
||||
|
||||
// To guard against changes in this currently unstable feature, use
|
||||
// a detection tests instead of a Rustc version and/or date test.
|
||||
fn stable_maybe_uninit() -> String {
|
||||
let code = "
|
||||
#![allow(warnings)]
|
||||
use std::mem::MaybeUninit;
|
||||
|
||||
fn main() { }
|
||||
";
|
||||
code.to_string()
|
||||
}
|
||||
|
||||
// To guard against changes in this currently unstable feature, use
|
||||
// a detection tests instead of a Rustc version and/or date test.
|
||||
fn maybe_uninit_code(use_feature: bool) -> String {
|
||||
let feature = if use_feature { "#![feature(untagged_unions)]" } else { "" };
|
||||
|
||||
let code = "
|
||||
#![allow(warnings)]
|
||||
use std::mem::ManuallyDrop;
|
||||
|
||||
#[derive(Copy)]
|
||||
pub union MaybeUninit<T> {
|
||||
empty: (),
|
||||
value: ManuallyDrop<T>,
|
||||
}
|
||||
|
||||
impl<T> Clone for MaybeUninit<T> where T: Copy
|
||||
{
|
||||
fn clone(&self) -> Self { *self }
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let value1 = MaybeUninit::<[i32; 3]> { empty: () };
|
||||
let value2 = MaybeUninit { value: ManuallyDrop::new([1, 2, 3]) };
|
||||
}
|
||||
";
|
||||
|
||||
|
||||
[feature, code].concat()
|
||||
}
|
||||
|
||||
/// Test if a code snippet can be compiled
|
||||
fn probe(code: &str) -> bool {
|
||||
let rustc = env::var_os("RUSTC").unwrap_or_else(|| "rustc".into());
|
||||
let out_dir = env::var_os("OUT_DIR").expect("environment variable OUT_DIR");
|
||||
|
||||
let mut child = Command::new(rustc)
|
||||
.arg("--out-dir")
|
||||
.arg(out_dir)
|
||||
.arg("--emit=obj")
|
||||
.arg("-")
|
||||
.stdin(Stdio::piped())
|
||||
.spawn()
|
||||
.expect("rustc probe");
|
||||
|
||||
child
|
||||
.stdin
|
||||
.as_mut()
|
||||
.expect("rustc stdin")
|
||||
.write_all(code.as_bytes())
|
||||
.expect("write rustc stdin");
|
||||
|
||||
child.wait().expect("rustc probe").success()
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
|
||||
.docblock pre.rust { background: #eeeeff; }
|
||||
pre.trait, pre.fn, pre.struct, pre.enum, pre.typedef { background: #fcfefc; }
|
||||
|
||||
/* Small “example” label for doc examples */
|
||||
.docblock pre.rust::before {
|
||||
content: "example";
|
||||
float: right;
|
||||
font-style: italic;
|
||||
font-size: 0.8em;
|
||||
margin-top: -10px;
|
||||
margin-right: -5px;
|
||||
}
|
||||
|
||||
|
||||
/* Fixup where display in trait listing */
|
||||
pre.trait .where::before {
|
||||
content: '\a ';
|
||||
}
|
||||
|
||||
.docblock code {
|
||||
background-color: inherit;
|
||||
font-weight: bold;
|
||||
padding: 0 0.1em;
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
|
||||
/// Trait for fixed size arrays.
|
||||
///
|
||||
/// This trait is implemented for some specific array sizes, see
|
||||
/// the implementor list below. At the current state of Rust we can't
|
||||
/// make this fully general for every array size.
|
||||
///
|
||||
/// The following crate features add more array sizes (and they are not
|
||||
/// enabled by default due to their impact on compliation speed).
|
||||
///
|
||||
/// - `array-sizes-33-128`: All sizes 33 to 128 are implemented
|
||||
/// (a few in this range are included by default).
|
||||
/// - `array-sizes-129-255`: All sizes 129 to 255 are implemented
|
||||
/// (a few in this range are included by default).
|
||||
pub unsafe trait Array {
|
||||
/// The array’s element type
|
||||
type Item;
|
||||
#[doc(hidden)]
|
||||
/// The smallest index type that indexes the array.
|
||||
type Index: Index;
|
||||
#[doc(hidden)]
|
||||
fn as_ptr(&self) -> *const Self::Item;
|
||||
#[doc(hidden)]
|
||||
fn as_mut_ptr(&mut self) -> *mut Self::Item;
|
||||
#[doc(hidden)]
|
||||
fn capacity() -> usize;
|
||||
}
|
||||
|
||||
pub trait Index : PartialEq + Copy {
|
||||
fn to_usize(self) -> usize;
|
||||
fn from(usize) -> Self;
|
||||
}
|
||||
|
||||
use std::slice::{from_raw_parts};
|
||||
|
||||
pub trait ArrayExt : Array {
|
||||
#[inline(always)]
|
||||
fn as_slice(&self) -> &[Self::Item] {
|
||||
unsafe {
|
||||
from_raw_parts(self.as_ptr(), Self::capacity())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> ArrayExt for A where A: Array { }
|
||||
|
||||
impl Index for u8 {
|
||||
#[inline(always)]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
#[inline(always)]
|
||||
fn from(ix: usize) -> Self { ix as u8 }
|
||||
}
|
||||
|
||||
impl Index for u16 {
|
||||
#[inline(always)]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
#[inline(always)]
|
||||
fn from(ix: usize) -> Self { ix as u16 }
|
||||
}
|
||||
|
||||
impl Index for u32 {
|
||||
#[inline(always)]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
#[inline(always)]
|
||||
fn from(ix: usize) -> Self { ix as u32 }
|
||||
}
|
||||
|
||||
impl Index for usize {
|
||||
#[inline(always)]
|
||||
fn to_usize(self) -> usize { self }
|
||||
#[inline(always)]
|
||||
fn from(ix: usize) -> Self { ix }
|
||||
}
|
||||
|
||||
macro_rules! fix_array_impl {
|
||||
($index_type:ty, $len:expr ) => (
|
||||
unsafe impl<T> Array for [T; $len] {
|
||||
type Item = T;
|
||||
type Index = $index_type;
|
||||
#[doc(hidden)]
|
||||
#[inline(always)]
|
||||
fn as_ptr(&self) -> *const T { self as *const _ as *const _ }
|
||||
#[doc(hidden)]
|
||||
#[inline(always)]
|
||||
fn as_mut_ptr(&mut self) -> *mut T { self as *mut _ as *mut _}
|
||||
#[doc(hidden)]
|
||||
#[inline(always)]
|
||||
fn capacity() -> usize { $len }
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
macro_rules! fix_array_impl_recursive {
|
||||
($index_type:ty, ) => ();
|
||||
($index_type:ty, $($len:expr,)*) => (
|
||||
$(fix_array_impl!($index_type, $len);)*
|
||||
);
|
||||
}
|
||||
|
||||
fix_array_impl_recursive!(u8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
|
||||
28, 29, 30, 31, );
|
||||
|
||||
#[cfg(not(feature="array-sizes-33-128"))]
|
||||
fix_array_impl_recursive!(u8, 32, 40, 48, 50, 56, 64, 72, 96, 100, 128, );
|
||||
|
||||
#[cfg(feature="array-sizes-33-128")]
|
||||
fix_array_impl_recursive!(u8,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
|
||||
92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
|
||||
109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
|
||||
125, 126, 127, 128,
|
||||
);
|
||||
|
||||
#[cfg(not(feature="array-sizes-129-255"))]
|
||||
fix_array_impl_recursive!(u8, 160, 192, 200, 224,);
|
||||
|
||||
#[cfg(feature="array-sizes-129-255")]
|
||||
fix_array_impl_recursive!(u8,
|
||||
129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
|
||||
141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
|
||||
157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172,
|
||||
173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
|
||||
189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204,
|
||||
205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
|
||||
221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236,
|
||||
237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
|
||||
253, 254, 255,
|
||||
);
|
||||
|
||||
fix_array_impl_recursive!(u16, 256, 384, 512, 768, 1024, 2048, 4096, 8192, 16384, 32768,);
|
||||
// This array size doesn't exist on 16-bit
|
||||
#[cfg(any(target_pointer_width="32", target_pointer_width="64"))]
|
||||
fix_array_impl_recursive!(u32, 1 << 16,);
|
||||
|
|
@ -0,0 +1,516 @@
|
|||
use std::borrow::Borrow;
|
||||
use std::cmp;
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::mem;
|
||||
use std::ptr;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::str;
|
||||
use std::str::Utf8Error;
|
||||
use std::slice;
|
||||
|
||||
use array::{Array, ArrayExt};
|
||||
use array::Index;
|
||||
use CapacityError;
|
||||
use char::encode_utf8;
|
||||
|
||||
#[cfg(feature="serde-1")]
|
||||
use serde::{Serialize, Deserialize, Serializer, Deserializer};
|
||||
|
||||
/// A string with a fixed capacity.
|
||||
///
|
||||
/// The `ArrayString` is a string backed by a fixed size array. It keeps track
|
||||
/// of its length.
|
||||
///
|
||||
/// The string is a contiguous value that you can store directly on the stack
|
||||
/// if needed.
|
||||
#[derive(Copy)]
|
||||
pub struct ArrayString<A: Array<Item=u8>> {
|
||||
// FIXME: Use Copyable union for xs when we can
|
||||
xs: A,
|
||||
len: A::Index,
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> Default for ArrayString<A> {
|
||||
/// Return an empty `ArrayString`
|
||||
fn default() -> ArrayString<A> {
|
||||
ArrayString::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> ArrayString<A> {
|
||||
/// Create a new empty `ArrayString`.
|
||||
///
|
||||
/// Capacity is inferred from the type parameter.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 16]>::new();
|
||||
/// string.push_str("foo");
|
||||
/// assert_eq!(&string[..], "foo");
|
||||
/// assert_eq!(string.capacity(), 16);
|
||||
/// ```
|
||||
pub fn new() -> ArrayString<A> {
|
||||
unsafe {
|
||||
ArrayString {
|
||||
// FIXME: Use Copyable union for xs when we can
|
||||
xs: mem::zeroed(),
|
||||
len: Index::from(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `ArrayString` from a `str`.
|
||||
///
|
||||
/// Capacity is inferred from the type parameter.
|
||||
///
|
||||
/// **Errors** if the backing array is not large enough to fit the string.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 3]>::from("foo").unwrap();
|
||||
/// assert_eq!(&string[..], "foo");
|
||||
/// assert_eq!(string.len(), 3);
|
||||
/// assert_eq!(string.capacity(), 3);
|
||||
/// ```
|
||||
pub fn from(s: &str) -> Result<Self, CapacityError<&str>> {
|
||||
let mut arraystr = Self::new();
|
||||
arraystr.try_push_str(s)?;
|
||||
Ok(arraystr)
|
||||
}
|
||||
|
||||
/// Create a new `ArrayString` from a byte string literal.
|
||||
///
|
||||
/// **Errors** if the byte string literal is not valid UTF-8.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let string = ArrayString::from_byte_string(b"hello world").unwrap();
|
||||
/// ```
|
||||
pub fn from_byte_string(b: &A) -> Result<Self, Utf8Error> {
|
||||
let mut arraystr = Self::new();
|
||||
let s = try!(str::from_utf8(b.as_slice()));
|
||||
let _result = arraystr.try_push_str(s);
|
||||
debug_assert!(_result.is_ok());
|
||||
Ok(arraystr)
|
||||
}
|
||||
|
||||
/// Return the capacity of the `ArrayString`.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let string = ArrayString::<[_; 3]>::new();
|
||||
/// assert_eq!(string.capacity(), 3);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn capacity(&self) -> usize { A::capacity() }
|
||||
|
||||
/// Return if the `ArrayString` is completely filled.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 1]>::new();
|
||||
/// assert!(!string.is_full());
|
||||
/// string.push_str("A");
|
||||
/// assert!(string.is_full());
|
||||
/// ```
|
||||
pub fn is_full(&self) -> bool { self.len() == self.capacity() }
|
||||
|
||||
/// Adds the given char to the end of the string.
|
||||
///
|
||||
/// ***Panics*** if the backing array is not large enough to fit the additional char.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 2]>::new();
|
||||
///
|
||||
/// string.push('a');
|
||||
/// string.push('b');
|
||||
///
|
||||
/// assert_eq!(&string[..], "ab");
|
||||
/// ```
|
||||
pub fn push(&mut self, c: char) {
|
||||
self.try_push(c).unwrap();
|
||||
}
|
||||
|
||||
/// Adds the given char to the end of the string.
|
||||
///
|
||||
/// Returns `Ok` if the push succeeds.
|
||||
///
|
||||
/// **Errors** if the backing array is not large enough to fit the additional char.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 2]>::new();
|
||||
///
|
||||
/// string.try_push('a').unwrap();
|
||||
/// string.try_push('b').unwrap();
|
||||
/// let overflow = string.try_push('c');
|
||||
///
|
||||
/// assert_eq!(&string[..], "ab");
|
||||
/// assert_eq!(overflow.unwrap_err().element(), 'c');
|
||||
/// ```
|
||||
pub fn try_push(&mut self, c: char) -> Result<(), CapacityError<char>> {
|
||||
let len = self.len();
|
||||
unsafe {
|
||||
match encode_utf8(c, &mut self.raw_mut_bytes()[len..]) {
|
||||
Ok(n) => {
|
||||
self.set_len(len + n);
|
||||
Ok(())
|
||||
}
|
||||
Err(_) => Err(CapacityError::new(c)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the given string slice to the end of the string.
|
||||
///
|
||||
/// ***Panics*** if the backing array is not large enough to fit the string.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 2]>::new();
|
||||
///
|
||||
/// string.push_str("a");
|
||||
/// string.push_str("d");
|
||||
///
|
||||
/// assert_eq!(&string[..], "ad");
|
||||
/// ```
|
||||
pub fn push_str(&mut self, s: &str) {
|
||||
self.try_push_str(s).unwrap()
|
||||
}
|
||||
|
||||
/// Adds the given string slice to the end of the string.
|
||||
///
|
||||
/// Returns `Ok` if the push succeeds.
|
||||
///
|
||||
/// **Errors** if the backing array is not large enough to fit the string.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 2]>::new();
|
||||
///
|
||||
/// string.try_push_str("a").unwrap();
|
||||
/// let overflow1 = string.try_push_str("bc");
|
||||
/// string.try_push_str("d").unwrap();
|
||||
/// let overflow2 = string.try_push_str("ef");
|
||||
///
|
||||
/// assert_eq!(&string[..], "ad");
|
||||
/// assert_eq!(overflow1.unwrap_err().element(), "bc");
|
||||
/// assert_eq!(overflow2.unwrap_err().element(), "ef");
|
||||
/// ```
|
||||
pub fn try_push_str<'a>(&mut self, s: &'a str) -> Result<(), CapacityError<&'a str>> {
|
||||
if s.len() > self.capacity() - self.len() {
|
||||
return Err(CapacityError::new(s));
|
||||
}
|
||||
unsafe {
|
||||
let dst = self.xs.as_mut_ptr().offset(self.len() as isize);
|
||||
let src = s.as_ptr();
|
||||
ptr::copy_nonoverlapping(src, dst, s.len());
|
||||
let newl = self.len() + s.len();
|
||||
self.set_len(newl);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Removes the last character from the string and returns it.
|
||||
///
|
||||
/// Returns `None` if this `ArrayString` is empty.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut s = ArrayString::<[_; 3]>::from("foo").unwrap();
|
||||
///
|
||||
/// assert_eq!(s.pop(), Some('o'));
|
||||
/// assert_eq!(s.pop(), Some('o'));
|
||||
/// assert_eq!(s.pop(), Some('f'));
|
||||
///
|
||||
/// assert_eq!(s.pop(), None);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn pop(&mut self) -> Option<char> {
|
||||
let ch = match self.chars().rev().next() {
|
||||
Some(ch) => ch,
|
||||
None => return None,
|
||||
};
|
||||
let new_len = self.len() - ch.len_utf8();
|
||||
unsafe {
|
||||
self.set_len(new_len);
|
||||
}
|
||||
Some(ch)
|
||||
}
|
||||
|
||||
/// Shortens this `ArrayString` to the specified length.
|
||||
///
|
||||
/// If `new_len` is greater than the string’s current length, this has no
|
||||
/// effect.
|
||||
///
|
||||
/// ***Panics*** if `new_len` does not lie on a `char` boundary.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 6]>::from("foobar").unwrap();
|
||||
/// string.truncate(3);
|
||||
/// assert_eq!(&string[..], "foo");
|
||||
/// string.truncate(4);
|
||||
/// assert_eq!(&string[..], "foo");
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn truncate(&mut self, new_len: usize) {
|
||||
if new_len <= self.len() {
|
||||
assert!(self.is_char_boundary(new_len));
|
||||
unsafe {
|
||||
// In libstd truncate is called on the underlying vector,
|
||||
// which in turns drops each element.
|
||||
// As we know we don't have to worry about Drop,
|
||||
// we can just set the length (a la clear.)
|
||||
self.set_len(new_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes a `char` from this `ArrayString` at a byte position and returns it.
|
||||
///
|
||||
/// This is an `O(n)` operation, as it requires copying every element in the
|
||||
/// array.
|
||||
///
|
||||
/// ***Panics*** if `idx` is larger than or equal to the `ArrayString`’s length,
|
||||
/// or if it does not lie on a `char` boundary.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut s = ArrayString::<[_; 3]>::from("foo").unwrap();
|
||||
///
|
||||
/// assert_eq!(s.remove(0), 'f');
|
||||
/// assert_eq!(s.remove(1), 'o');
|
||||
/// assert_eq!(s.remove(0), 'o');
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn remove(&mut self, idx: usize) -> char {
|
||||
let ch = match self[idx..].chars().next() {
|
||||
Some(ch) => ch,
|
||||
None => panic!("cannot remove a char from the end of a string"),
|
||||
};
|
||||
|
||||
let next = idx + ch.len_utf8();
|
||||
let len = self.len();
|
||||
unsafe {
|
||||
ptr::copy(self.xs.as_ptr().offset(next as isize),
|
||||
self.xs.as_mut_ptr().offset(idx as isize),
|
||||
len - next);
|
||||
self.set_len(len - (next - idx));
|
||||
}
|
||||
ch
|
||||
}
|
||||
|
||||
/// Make the string empty.
|
||||
pub fn clear(&mut self) {
|
||||
unsafe {
|
||||
self.set_len(0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the strings’s length.
|
||||
///
|
||||
/// This function is `unsafe` because it changes the notion of the
|
||||
/// number of “valid” bytes in the string. Use with care.
|
||||
///
|
||||
/// This method uses *debug assertions* to check the validity of `length`
|
||||
/// and may use other debug assertions.
|
||||
#[inline]
|
||||
pub unsafe fn set_len(&mut self, length: usize) {
|
||||
debug_assert!(length <= self.capacity());
|
||||
self.len = Index::from(length);
|
||||
}
|
||||
|
||||
/// Return a string slice of the whole `ArrayString`.
|
||||
pub fn as_str(&self) -> &str {
|
||||
self
|
||||
}
|
||||
|
||||
/// Return a mutable slice of the whole string’s buffer
|
||||
unsafe fn raw_mut_bytes(&mut self) -> &mut [u8] {
|
||||
slice::from_raw_parts_mut(self.xs.as_mut_ptr(), self.capacity())
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> Deref for ArrayString<A> {
|
||||
type Target = str;
|
||||
#[inline]
|
||||
fn deref(&self) -> &str {
|
||||
unsafe {
|
||||
let sl = slice::from_raw_parts(self.xs.as_ptr(), self.len.to_usize());
|
||||
str::from_utf8_unchecked(sl)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> DerefMut for ArrayString<A> {
|
||||
#[inline]
|
||||
fn deref_mut(&mut self) -> &mut str {
|
||||
unsafe {
|
||||
let sl = slice::from_raw_parts_mut(self.xs.as_mut_ptr(), self.len.to_usize());
|
||||
// FIXME: Nothing but transmute to do this right now
|
||||
mem::transmute(sl)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> PartialEq for ArrayString<A> {
|
||||
fn eq(&self, rhs: &Self) -> bool {
|
||||
**self == **rhs
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> PartialEq<str> for ArrayString<A> {
|
||||
fn eq(&self, rhs: &str) -> bool {
|
||||
&**self == rhs
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> PartialEq<ArrayString<A>> for str {
|
||||
fn eq(&self, rhs: &ArrayString<A>) -> bool {
|
||||
self == &**rhs
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> Eq for ArrayString<A> { }
|
||||
|
||||
impl<A: Array<Item=u8>> Hash for ArrayString<A> {
|
||||
fn hash<H: Hasher>(&self, h: &mut H) {
|
||||
(**self).hash(h)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> Borrow<str> for ArrayString<A> {
|
||||
fn borrow(&self) -> &str { self }
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> AsRef<str> for ArrayString<A> {
|
||||
fn as_ref(&self) -> &str { self }
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> fmt::Debug for ArrayString<A> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { (**self).fmt(f) }
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> fmt::Display for ArrayString<A> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { (**self).fmt(f) }
|
||||
}
|
||||
|
||||
/// `Write` appends written data to the end of the string.
|
||||
impl<A: Array<Item=u8>> fmt::Write for ArrayString<A> {
|
||||
fn write_char(&mut self, c: char) -> fmt::Result {
|
||||
self.try_push(c).map_err(|_| fmt::Error)
|
||||
}
|
||||
|
||||
fn write_str(&mut self, s: &str) -> fmt::Result {
|
||||
self.try_push_str(s).map_err(|_| fmt::Error)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8> + Copy> Clone for ArrayString<A> {
|
||||
fn clone(&self) -> ArrayString<A> {
|
||||
*self
|
||||
}
|
||||
fn clone_from(&mut self, rhs: &Self) {
|
||||
// guaranteed to fit due to types matching.
|
||||
self.clear();
|
||||
self.try_push_str(rhs).ok();
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> PartialOrd for ArrayString<A> {
|
||||
fn partial_cmp(&self, rhs: &Self) -> Option<cmp::Ordering> {
|
||||
(**self).partial_cmp(&**rhs)
|
||||
}
|
||||
fn lt(&self, rhs: &Self) -> bool { **self < **rhs }
|
||||
fn le(&self, rhs: &Self) -> bool { **self <= **rhs }
|
||||
fn gt(&self, rhs: &Self) -> bool { **self > **rhs }
|
||||
fn ge(&self, rhs: &Self) -> bool { **self >= **rhs }
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> PartialOrd<str> for ArrayString<A> {
|
||||
fn partial_cmp(&self, rhs: &str) -> Option<cmp::Ordering> {
|
||||
(**self).partial_cmp(rhs)
|
||||
}
|
||||
fn lt(&self, rhs: &str) -> bool { &**self < rhs }
|
||||
fn le(&self, rhs: &str) -> bool { &**self <= rhs }
|
||||
fn gt(&self, rhs: &str) -> bool { &**self > rhs }
|
||||
fn ge(&self, rhs: &str) -> bool { &**self >= rhs }
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> PartialOrd<ArrayString<A>> for str {
|
||||
fn partial_cmp(&self, rhs: &ArrayString<A>) -> Option<cmp::Ordering> {
|
||||
self.partial_cmp(&**rhs)
|
||||
}
|
||||
fn lt(&self, rhs: &ArrayString<A>) -> bool { self < &**rhs }
|
||||
fn le(&self, rhs: &ArrayString<A>) -> bool { self <= &**rhs }
|
||||
fn gt(&self, rhs: &ArrayString<A>) -> bool { self > &**rhs }
|
||||
fn ge(&self, rhs: &ArrayString<A>) -> bool { self >= &**rhs }
|
||||
}
|
||||
|
||||
impl<A: Array<Item=u8>> Ord for ArrayString<A> {
|
||||
fn cmp(&self, rhs: &Self) -> cmp::Ordering {
|
||||
(**self).cmp(&**rhs)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature="serde-1")]
|
||||
/// Requires crate feature `"serde-1"`
|
||||
impl<A: Array<Item=u8>> Serialize for ArrayString<A> {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer
|
||||
{
|
||||
serializer.serialize_str(&*self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature="serde-1")]
|
||||
/// Requires crate feature `"serde-1"`
|
||||
impl<'de, A: Array<Item=u8>> Deserialize<'de> for ArrayString<A> {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
use serde::de::{self, Visitor};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
struct ArrayStringVisitor<A: Array<Item=u8>>(PhantomData<A>);
|
||||
|
||||
impl<'de, A: Array<Item=u8>> Visitor<'de> for ArrayStringVisitor<A> {
|
||||
type Value = ArrayString<A>;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(formatter, "a string no more than {} bytes long", A::capacity())
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||
where E: de::Error,
|
||||
{
|
||||
ArrayString::from(v).map_err(|_| E::invalid_length(v.len(), &self))
|
||||
}
|
||||
|
||||
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
|
||||
where E: de::Error,
|
||||
{
|
||||
let s = try!(str::from_utf8(v).map_err(|_| E::invalid_value(de::Unexpected::Bytes(v), &self)));
|
||||
|
||||
ArrayString::from(s).map_err(|_| E::invalid_length(s.len(), &self))
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_str(ArrayStringVisitor::<A>(PhantomData))
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
//
|
||||
// Original authors: alexchrichton, bluss
|
||||
|
||||
// UTF-8 ranges and tags for encoding characters
|
||||
const TAG_CONT: u8 = 0b1000_0000;
|
||||
const TAG_TWO_B: u8 = 0b1100_0000;
|
||||
const TAG_THREE_B: u8 = 0b1110_0000;
|
||||
const TAG_FOUR_B: u8 = 0b1111_0000;
|
||||
const MAX_ONE_B: u32 = 0x80;
|
||||
const MAX_TWO_B: u32 = 0x800;
|
||||
const MAX_THREE_B: u32 = 0x10000;
|
||||
|
||||
/// Placeholder
|
||||
pub struct EncodeUtf8Error;
|
||||
|
||||
/// Encode a char into buf using UTF-8.
|
||||
///
|
||||
/// On success, return the byte length of the encoding (1, 2, 3 or 4).<br>
|
||||
/// On error, return `EncodeUtf8Error` if the buffer was too short for the char.
|
||||
#[inline]
|
||||
pub fn encode_utf8(ch: char, buf: &mut [u8]) -> Result<usize, EncodeUtf8Error>
|
||||
{
|
||||
let code = ch as u32;
|
||||
if code < MAX_ONE_B && buf.len() >= 1 {
|
||||
buf[0] = code as u8;
|
||||
return Ok(1);
|
||||
} else if code < MAX_TWO_B && buf.len() >= 2 {
|
||||
buf[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
|
||||
buf[1] = (code & 0x3F) as u8 | TAG_CONT;
|
||||
return Ok(2);
|
||||
} else if code < MAX_THREE_B && buf.len() >= 3 {
|
||||
buf[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
|
||||
buf[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
buf[2] = (code & 0x3F) as u8 | TAG_CONT;
|
||||
return Ok(3);
|
||||
} else if buf.len() >= 4 {
|
||||
buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
|
||||
buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
|
||||
buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
|
||||
return Ok(4);
|
||||
};
|
||||
Err(EncodeUtf8Error)
|
||||
}
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
use std::fmt;
|
||||
#[cfg(feature="std")]
|
||||
use std::any::Any;
|
||||
#[cfg(feature="std")]
|
||||
use std::error::Error;
|
||||
|
||||
/// Error value indicating insufficient capacity
|
||||
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)]
|
||||
pub struct CapacityError<T = ()> {
|
||||
element: T,
|
||||
}
|
||||
|
||||
impl<T> CapacityError<T> {
|
||||
/// Create a new `CapacityError` from `element`.
|
||||
pub fn new(element: T) -> CapacityError<T> {
|
||||
CapacityError {
|
||||
element: element,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the overflowing element
|
||||
pub fn element(self) -> T {
|
||||
self.element
|
||||
}
|
||||
|
||||
/// Convert into a `CapacityError` that does not carry an element.
|
||||
pub fn simplify(self) -> CapacityError {
|
||||
CapacityError { element: () }
|
||||
}
|
||||
}
|
||||
|
||||
const CAPERROR: &'static str = "insufficient capacity";
|
||||
|
||||
#[cfg(feature="std")]
|
||||
/// Requires `features="std"`.
|
||||
impl<T: Any> Error for CapacityError<T> {
|
||||
fn description(&self) -> &str {
|
||||
CAPERROR
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> fmt::Display for CapacityError<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", CAPERROR)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> fmt::Debug for CapacityError<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}: {}", "CapacityError", CAPERROR)
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,46 @@
|
|||
|
||||
|
||||
use array::Array;
|
||||
use std::mem::ManuallyDrop;
|
||||
|
||||
/// A combination of ManuallyDrop and “maybe uninitialized”;
|
||||
/// this wraps a value that can be wholly or partially uninitialized;
|
||||
/// it also has no drop regardless of the type of T.
|
||||
#[repr(C)] // for cast from self ptr to value
|
||||
pub union MaybeUninit<T> {
|
||||
empty: (),
|
||||
value: ManuallyDrop<T>,
|
||||
}
|
||||
// Why we don't use std's MaybeUninit on nightly? See the ptr method
|
||||
|
||||
impl<T> MaybeUninit<T> {
|
||||
/// Create a new MaybeUninit with uninitialized interior
|
||||
pub unsafe fn uninitialized() -> Self {
|
||||
MaybeUninit { empty: () }
|
||||
}
|
||||
|
||||
/// Create a new MaybeUninit from the value `v`.
|
||||
pub fn from(v: T) -> Self {
|
||||
MaybeUninit { value: ManuallyDrop::new(v) }
|
||||
}
|
||||
|
||||
// Raw pointer casts written so that we don't reference or access the
|
||||
// uninitialized interior value
|
||||
|
||||
/// Return a raw pointer to the start of the interior array
|
||||
pub fn ptr(&self) -> *const T::Item
|
||||
where T: Array
|
||||
{
|
||||
// std MaybeUninit creates a &self.value reference here which is
|
||||
// not guaranteed to be sound in our case - we will partially
|
||||
// initialize the value, not always wholly.
|
||||
self as *const _ as *const T::Item
|
||||
}
|
||||
|
||||
/// Return a mut raw pointer to the start of the interior array
|
||||
pub fn ptr_mut(&mut self) -> *mut T::Item
|
||||
where T: Array
|
||||
{
|
||||
self as *mut _ as *mut T::Item
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
use array::Array;
|
||||
use nodrop::NoDrop;
|
||||
use std::mem::uninitialized;
|
||||
|
||||
/// A combination of NoDrop and “maybe uninitialized”;
|
||||
/// this wraps a value that can be wholly or partially uninitialized.
|
||||
///
|
||||
/// NOTE: This is known to not be a good solution, but it's the one we have kept
|
||||
/// working on stable Rust. Stable improvements are encouraged, in any form,
|
||||
/// but of course we are waiting for a real, stable, MaybeUninit.
|
||||
pub struct MaybeUninit<T>(NoDrop<T>);
|
||||
// why don't we use ManuallyDrop here: It doesn't inhibit
|
||||
// enum layout optimizations that depend on T, and we support older Rust.
|
||||
|
||||
impl<T> MaybeUninit<T> {
|
||||
/// Create a new MaybeUninit with uninitialized interior
|
||||
pub unsafe fn uninitialized() -> Self {
|
||||
Self::from(uninitialized())
|
||||
}
|
||||
|
||||
/// Create a new MaybeUninit from the value `v`.
|
||||
pub fn from(v: T) -> Self {
|
||||
MaybeUninit(NoDrop::new(v))
|
||||
}
|
||||
|
||||
/// Return a raw pointer to the start of the interior array
|
||||
pub fn ptr(&self) -> *const T::Item
|
||||
where T: Array
|
||||
{
|
||||
&*self.0 as *const T as *const _
|
||||
}
|
||||
|
||||
/// Return a mut raw pointer to the start of the interior array
|
||||
pub fn ptr_mut(&mut self) -> *mut T::Item
|
||||
where T: Array
|
||||
{
|
||||
&mut *self.0 as *mut T as *mut _
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
|
||||
use array::Array;
|
||||
use std::mem::MaybeUninit as StdMaybeUninit;
|
||||
|
||||
pub struct MaybeUninit<T> {
|
||||
inner: StdMaybeUninit<T>,
|
||||
}
|
||||
|
||||
impl<T> MaybeUninit<T> {
|
||||
/// Create a new MaybeUninit with uninitialized interior
|
||||
pub unsafe fn uninitialized() -> Self {
|
||||
MaybeUninit { inner: StdMaybeUninit::uninit() }
|
||||
}
|
||||
|
||||
/// Create a new MaybeUninit from the value `v`.
|
||||
pub fn from(v: T) -> Self {
|
||||
MaybeUninit { inner: StdMaybeUninit::new(v) }
|
||||
}
|
||||
|
||||
// Raw pointer casts written so that we don't reference or access the
|
||||
// uninitialized interior value
|
||||
|
||||
/// Return a raw pointer to the start of the interior array
|
||||
pub fn ptr(&self) -> *const T::Item
|
||||
where T: Array
|
||||
{
|
||||
// std MaybeUninit creates a &self.value reference here which is
|
||||
// not guaranteed to be sound in our case - we will partially
|
||||
// initialize the value, not always wholly.
|
||||
self.inner.as_ptr() as *const T::Item
|
||||
}
|
||||
|
||||
/// Return a mut raw pointer to the start of the interior array
|
||||
pub fn ptr_mut(&mut self) -> *mut T::Item
|
||||
where T: Array
|
||||
{
|
||||
self.inner.as_mut_ptr() as *mut T::Item
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
|
||||
use std::ops::{
|
||||
RangeFull,
|
||||
RangeFrom,
|
||||
RangeTo,
|
||||
Range,
|
||||
};
|
||||
|
||||
/// `RangeArgument` is implemented by Rust's built-in range types, produced
|
||||
/// by range syntax like `..`, `a..`, `..b` or `c..d`.
|
||||
///
|
||||
/// Note: This is arrayvec's provisional trait, waiting for stable Rust to
|
||||
/// provide an equivalent.
|
||||
pub trait RangeArgument {
|
||||
#[inline]
|
||||
/// Start index (inclusive)
|
||||
fn start(&self) -> Option<usize> { None }
|
||||
#[inline]
|
||||
/// End index (exclusive)
|
||||
fn end(&self) -> Option<usize> { None }
|
||||
}
|
||||
|
||||
|
||||
impl RangeArgument for RangeFull {}
|
||||
|
||||
impl RangeArgument for RangeFrom<usize> {
|
||||
#[inline]
|
||||
fn start(&self) -> Option<usize> { Some(self.start) }
|
||||
}
|
||||
|
||||
impl RangeArgument for RangeTo<usize> {
|
||||
#[inline]
|
||||
fn end(&self) -> Option<usize> { Some(self.end) }
|
||||
}
|
||||
|
||||
impl RangeArgument for Range<usize> {
|
||||
#[inline]
|
||||
fn start(&self) -> Option<usize> { Some(self.start) }
|
||||
#[inline]
|
||||
fn end(&self) -> Option<usize> { Some(self.end) }
|
||||
}
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
#![cfg(feature = "serde-1")]
|
||||
extern crate arrayvec;
|
||||
extern crate serde_test;
|
||||
|
||||
mod array_vec {
|
||||
use arrayvec::ArrayVec;
|
||||
|
||||
use serde_test::{Token, assert_tokens, assert_de_tokens_error};
|
||||
|
||||
#[test]
|
||||
fn test_ser_de_empty() {
|
||||
let vec = ArrayVec::<[u32; 0]>::new();
|
||||
|
||||
assert_tokens(&vec, &[
|
||||
Token::Seq { len: Some(0) },
|
||||
Token::SeqEnd,
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_ser_de() {
|
||||
let mut vec = ArrayVec::<[u32; 3]>::new();
|
||||
vec.push(20);
|
||||
vec.push(55);
|
||||
vec.push(123);
|
||||
|
||||
assert_tokens(&vec, &[
|
||||
Token::Seq { len: Some(3) },
|
||||
Token::U32(20),
|
||||
Token::U32(55),
|
||||
Token::U32(123),
|
||||
Token::SeqEnd,
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_de_too_large() {
|
||||
assert_de_tokens_error::<ArrayVec<[u32; 2]>>(&[
|
||||
Token::Seq { len: Some(3) },
|
||||
Token::U32(13),
|
||||
Token::U32(42),
|
||||
Token::U32(68),
|
||||
], "invalid length 3, expected an array with no more than 2 items");
|
||||
}
|
||||
}
|
||||
|
||||
mod array_string {
|
||||
use arrayvec::ArrayString;
|
||||
|
||||
use serde_test::{Token, assert_tokens, assert_de_tokens_error};
|
||||
|
||||
#[test]
|
||||
fn test_ser_de_empty() {
|
||||
let string = ArrayString::<[u8; 0]>::new();
|
||||
|
||||
assert_tokens(&string, &[
|
||||
Token::Str(""),
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_ser_de() {
|
||||
let string = ArrayString::<[u8; 9]>::from("1234 abcd")
|
||||
.expect("expected exact specified capacity to be enough");
|
||||
|
||||
assert_tokens(&string, &[
|
||||
Token::Str("1234 abcd"),
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_de_too_large() {
|
||||
assert_de_tokens_error::<ArrayString<[u8; 2]>>(&[
|
||||
Token::Str("afd")
|
||||
], "invalid length 3, expected a string no more than 2 bytes long");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,517 @@
|
|||
extern crate arrayvec;
|
||||
#[macro_use] extern crate matches;
|
||||
|
||||
use arrayvec::ArrayVec;
|
||||
use arrayvec::ArrayString;
|
||||
use std::mem;
|
||||
use arrayvec::CapacityError;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_simple() {
|
||||
use std::ops::Add;
|
||||
|
||||
let mut vec: ArrayVec<[Vec<i32>; 3]> = ArrayVec::new();
|
||||
|
||||
vec.push(vec![1, 2, 3, 4]);
|
||||
vec.push(vec![10]);
|
||||
vec.push(vec![-1, 13, -2]);
|
||||
|
||||
for elt in &vec {
|
||||
assert_eq!(elt.iter().fold(0, Add::add), 10);
|
||||
}
|
||||
|
||||
let sum_len = vec.into_iter().map(|x| x.len()).fold(0, Add::add);
|
||||
assert_eq!(sum_len, 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u16_index() {
|
||||
const N: usize = 4096;
|
||||
let mut vec: ArrayVec<[_; N]> = ArrayVec::new();
|
||||
for _ in 0..N {
|
||||
assert!(vec.try_push(1u8).is_ok());
|
||||
}
|
||||
assert!(vec.try_push(0).is_err());
|
||||
assert_eq!(vec.len(), N);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iter() {
|
||||
let mut iter = ArrayVec::from([1, 2, 3]).into_iter();
|
||||
assert_eq!(iter.size_hint(), (3, Some(3)));
|
||||
assert_eq!(iter.next_back(), Some(3));
|
||||
assert_eq!(iter.next(), Some(1));
|
||||
assert_eq!(iter.next_back(), Some(2));
|
||||
assert_eq!(iter.size_hint(), (0, Some(0)));
|
||||
assert_eq!(iter.next_back(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_drop() {
|
||||
use std::cell::Cell;
|
||||
|
||||
let flag = &Cell::new(0);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Bump<'a>(&'a Cell<i32>);
|
||||
|
||||
impl<'a> Drop for Bump<'a> {
|
||||
fn drop(&mut self) {
|
||||
let n = self.0.get();
|
||||
self.0.set(n + 1);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let mut array = ArrayVec::<[Bump; 128]>::new();
|
||||
array.push(Bump(flag));
|
||||
array.push(Bump(flag));
|
||||
}
|
||||
assert_eq!(flag.get(), 2);
|
||||
|
||||
// test something with the nullable pointer optimization
|
||||
flag.set(0);
|
||||
|
||||
{
|
||||
let mut array = ArrayVec::<[_; 3]>::new();
|
||||
array.push(vec![Bump(flag)]);
|
||||
array.push(vec![Bump(flag), Bump(flag)]);
|
||||
array.push(vec![]);
|
||||
let push4 = array.try_push(vec![Bump(flag)]);
|
||||
assert_eq!(flag.get(), 0);
|
||||
drop(push4);
|
||||
assert_eq!(flag.get(), 1);
|
||||
drop(array.pop());
|
||||
assert_eq!(flag.get(), 1);
|
||||
drop(array.pop());
|
||||
assert_eq!(flag.get(), 3);
|
||||
}
|
||||
|
||||
assert_eq!(flag.get(), 4);
|
||||
|
||||
// test into_inner
|
||||
flag.set(0);
|
||||
{
|
||||
let mut array = ArrayVec::<[_; 3]>::new();
|
||||
array.push(Bump(flag));
|
||||
array.push(Bump(flag));
|
||||
array.push(Bump(flag));
|
||||
let inner = array.into_inner();
|
||||
assert!(inner.is_ok());
|
||||
assert_eq!(flag.get(), 0);
|
||||
drop(inner);
|
||||
assert_eq!(flag.get(), 3);
|
||||
}
|
||||
|
||||
// test cloning into_iter
|
||||
flag.set(0);
|
||||
{
|
||||
let mut array = ArrayVec::<[_; 3]>::new();
|
||||
array.push(Bump(flag));
|
||||
array.push(Bump(flag));
|
||||
array.push(Bump(flag));
|
||||
let mut iter = array.into_iter();
|
||||
assert_eq!(flag.get(), 0);
|
||||
iter.next();
|
||||
assert_eq!(flag.get(), 1);
|
||||
let clone = iter.clone();
|
||||
assert_eq!(flag.get(), 1);
|
||||
drop(clone);
|
||||
assert_eq!(flag.get(), 3);
|
||||
drop(iter);
|
||||
assert_eq!(flag.get(), 5);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extend() {
|
||||
let mut range = 0..10;
|
||||
|
||||
let mut array: ArrayVec<[_; 5]> = range.by_ref().collect();
|
||||
assert_eq!(&array[..], &[0, 1, 2, 3, 4]);
|
||||
assert_eq!(range.next(), Some(5));
|
||||
|
||||
array.extend(range.by_ref());
|
||||
assert_eq!(range.next(), Some(6));
|
||||
|
||||
let mut array: ArrayVec<[_; 10]> = (0..3).collect();
|
||||
assert_eq!(&array[..], &[0, 1, 2]);
|
||||
array.extend(3..5);
|
||||
assert_eq!(&array[..], &[0, 1, 2, 3, 4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_send_sync() {
|
||||
let data = ArrayVec::<[Vec<i32>; 5]>::new();
|
||||
&data as &Send;
|
||||
&data as &Sync;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_size() {
|
||||
// Future rust will kill these drop flags!
|
||||
// 4 elements size + 1 len + 1 enum tag + [1 drop flag]
|
||||
type ByteArray = ArrayVec<[u8; 4]>;
|
||||
println!("{}", mem::size_of::<ByteArray>());
|
||||
assert!(mem::size_of::<ByteArray>() <= 8);
|
||||
|
||||
// 12 element size + 1 enum tag + 3 padding + 1 len + 1 drop flag + 2 padding
|
||||
type QuadArray = ArrayVec<[u32; 3]>;
|
||||
println!("{}", mem::size_of::<QuadArray>());
|
||||
assert!(mem::size_of::<QuadArray>() <= 24);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_still_works_with_option_arrayvec() {
|
||||
type RefArray = ArrayVec<[&'static i32; 2]>;
|
||||
let array = Some(RefArray::new());
|
||||
assert!(array.is_some());
|
||||
println!("{:?}", array);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_drain() {
|
||||
let mut v = ArrayVec::from([0; 8]);
|
||||
v.pop();
|
||||
v.drain(0..7);
|
||||
assert_eq!(&v[..], &[]);
|
||||
|
||||
v.extend(0..);
|
||||
v.drain(1..4);
|
||||
assert_eq!(&v[..], &[0, 4, 5, 6, 7]);
|
||||
let u: ArrayVec<[_; 3]> = v.drain(1..4).rev().collect();
|
||||
assert_eq!(&u[..], &[6, 5, 4]);
|
||||
assert_eq!(&v[..], &[0, 7]);
|
||||
v.drain(..);
|
||||
assert_eq!(&v[..], &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retain() {
|
||||
let mut v = ArrayVec::from([0; 8]);
|
||||
for (i, elt) in v.iter_mut().enumerate() {
|
||||
*elt = i;
|
||||
}
|
||||
v.retain(|_| true);
|
||||
assert_eq!(&v[..], &[0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
v.retain(|elt| {
|
||||
*elt /= 2;
|
||||
*elt % 2 == 0
|
||||
});
|
||||
assert_eq!(&v[..], &[0, 0, 2, 2]);
|
||||
v.retain(|_| false);
|
||||
assert_eq!(&v[..], &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_drain_oob() {
|
||||
let mut v = ArrayVec::from([0; 8]);
|
||||
v.pop();
|
||||
v.drain(0..8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_drop_panic() {
|
||||
struct DropPanic;
|
||||
|
||||
impl Drop for DropPanic {
|
||||
fn drop(&mut self) {
|
||||
panic!("drop");
|
||||
}
|
||||
}
|
||||
|
||||
let mut array = ArrayVec::<[DropPanic; 1]>::new();
|
||||
array.push(DropPanic);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_drop_panic_into_iter() {
|
||||
struct DropPanic;
|
||||
|
||||
impl Drop for DropPanic {
|
||||
fn drop(&mut self) {
|
||||
panic!("drop");
|
||||
}
|
||||
}
|
||||
|
||||
let mut array = ArrayVec::<[DropPanic; 1]>::new();
|
||||
array.push(DropPanic);
|
||||
array.into_iter();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert() {
|
||||
let mut v = ArrayVec::from([]);
|
||||
assert_matches!(v.try_push(1), Err(_));
|
||||
|
||||
let mut v = ArrayVec::<[_; 3]>::new();
|
||||
v.insert(0, 0);
|
||||
v.insert(1, 1);
|
||||
//let ret1 = v.try_insert(3, 3);
|
||||
//assert_matches!(ret1, Err(InsertError::OutOfBounds(_)));
|
||||
assert_eq!(&v[..], &[0, 1]);
|
||||
v.insert(2, 2);
|
||||
assert_eq!(&v[..], &[0, 1, 2]);
|
||||
|
||||
let ret2 = v.try_insert(1, 9);
|
||||
assert_eq!(&v[..], &[0, 1, 2]);
|
||||
assert_matches!(ret2, Err(_));
|
||||
|
||||
let mut v = ArrayVec::from([2]);
|
||||
assert_matches!(v.try_insert(0, 1), Err(CapacityError { .. }));
|
||||
assert_matches!(v.try_insert(1, 1), Err(CapacityError { .. }));
|
||||
//assert_matches!(v.try_insert(2, 1), Err(CapacityError { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_into_inner_1() {
|
||||
let mut v = ArrayVec::from([1, 2]);
|
||||
v.pop();
|
||||
let u = v.clone();
|
||||
assert_eq!(v.into_inner(), Err(u));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_into_inner_2() {
|
||||
let mut v = ArrayVec::<[String; 4]>::new();
|
||||
v.push("a".into());
|
||||
v.push("b".into());
|
||||
v.push("c".into());
|
||||
v.push("d".into());
|
||||
assert_eq!(v.into_inner().unwrap(), ["a", "b", "c", "d"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_into_inner_3_() {
|
||||
let mut v = ArrayVec::<[i32; 4]>::new();
|
||||
v.extend(1..);
|
||||
assert_eq!(v.into_inner().unwrap(), [1, 2, 3, 4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write() {
|
||||
use std::io::Write;
|
||||
let mut v = ArrayVec::<[_; 8]>::new();
|
||||
write!(&mut v, "\x01\x02\x03").unwrap();
|
||||
assert_eq!(&v[..], &[1, 2, 3]);
|
||||
let r = v.write(&[9; 16]).unwrap();
|
||||
assert_eq!(r, 5);
|
||||
assert_eq!(&v[..], &[1, 2, 3, 9, 9, 9, 9, 9]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn array_clone_from() {
|
||||
let mut v = ArrayVec::<[_; 4]>::new();
|
||||
v.push(vec![1, 2]);
|
||||
v.push(vec![3, 4, 5]);
|
||||
v.push(vec![6]);
|
||||
let reference = v.to_vec();
|
||||
let mut u = ArrayVec::<[_; 4]>::new();
|
||||
u.clone_from(&v);
|
||||
assert_eq!(&u, &reference[..]);
|
||||
|
||||
let mut t = ArrayVec::<[_; 4]>::new();
|
||||
t.push(vec![97]);
|
||||
t.push(vec![]);
|
||||
t.push(vec![5, 6, 2]);
|
||||
t.push(vec![2]);
|
||||
t.clone_from(&v);
|
||||
assert_eq!(&t, &reference[..]);
|
||||
t.clear();
|
||||
t.clone_from(&v);
|
||||
assert_eq!(&t, &reference[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string() {
|
||||
use std::error::Error;
|
||||
|
||||
let text = "hello world";
|
||||
let mut s = ArrayString::<[_; 16]>::new();
|
||||
s.try_push_str(text).unwrap();
|
||||
assert_eq!(&s, text);
|
||||
assert_eq!(text, &s);
|
||||
|
||||
// Make sure Hash / Eq / Borrow match up so we can use HashMap
|
||||
let mut map = HashMap::new();
|
||||
map.insert(s, 1);
|
||||
assert_eq!(map[text], 1);
|
||||
|
||||
let mut t = ArrayString::<[_; 2]>::new();
|
||||
assert!(t.try_push_str(text).is_err());
|
||||
assert_eq!(&t, "");
|
||||
|
||||
t.push_str("ab");
|
||||
// DerefMut
|
||||
let tmut: &mut str = &mut t;
|
||||
assert_eq!(tmut, "ab");
|
||||
|
||||
// Test Error trait / try
|
||||
let t = || -> Result<(), Box<Error>> {
|
||||
let mut t = ArrayString::<[_; 2]>::new();
|
||||
try!(t.try_push_str(text));
|
||||
Ok(())
|
||||
}();
|
||||
assert!(t.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_from() {
|
||||
let text = "hello world";
|
||||
// Test `from` constructor
|
||||
let u = ArrayString::<[_; 11]>::from(text).unwrap();
|
||||
assert_eq!(&u, text);
|
||||
assert_eq!(u.len(), text.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_from_bytes() {
|
||||
let text = "hello world";
|
||||
let u = ArrayString::from_byte_string(b"hello world").unwrap();
|
||||
assert_eq!(&u, text);
|
||||
assert_eq!(u.len(), text.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_clone() {
|
||||
let text = "hi";
|
||||
let mut s = ArrayString::<[_; 4]>::new();
|
||||
s.push_str("abcd");
|
||||
let t = ArrayString::<[_; 4]>::from(text).unwrap();
|
||||
s.clone_from(&t);
|
||||
assert_eq!(&t, &s);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_push() {
|
||||
let text = "abcαβγ";
|
||||
let mut s = ArrayString::<[_; 8]>::new();
|
||||
for c in text.chars() {
|
||||
if let Err(_) = s.try_push(c) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert_eq!("abcαβ", &s[..]);
|
||||
s.push('x');
|
||||
assert_eq!("abcαβx", &s[..]);
|
||||
assert!(s.try_push('x').is_err());
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_insert_at_length() {
|
||||
let mut v = ArrayVec::<[_; 8]>::new();
|
||||
let result1 = v.try_insert(0, "a");
|
||||
let result2 = v.try_insert(1, "b");
|
||||
assert!(result1.is_ok() && result2.is_ok());
|
||||
assert_eq!(&v[..], &["a", "b"]);
|
||||
}
|
||||
|
||||
#[should_panic]
|
||||
#[test]
|
||||
fn test_insert_out_of_bounds() {
|
||||
let mut v = ArrayVec::<[_; 8]>::new();
|
||||
let _ = v.try_insert(1, "test");
|
||||
}
|
||||
|
||||
/*
|
||||
* insert that pushes out the last
|
||||
let mut u = ArrayVec::from([1, 2, 3, 4]);
|
||||
let ret = u.try_insert(3, 99);
|
||||
assert_eq!(&u[..], &[1, 2, 3, 99]);
|
||||
assert_matches!(ret, Err(_));
|
||||
let ret = u.try_insert(4, 77);
|
||||
assert_eq!(&u[..], &[1, 2, 3, 99]);
|
||||
assert_matches!(ret, Err(_));
|
||||
*/
|
||||
|
||||
#[test]
|
||||
fn test_drop_in_insert() {
|
||||
use std::cell::Cell;
|
||||
|
||||
let flag = &Cell::new(0);
|
||||
|
||||
struct Bump<'a>(&'a Cell<i32>);
|
||||
|
||||
impl<'a> Drop for Bump<'a> {
|
||||
fn drop(&mut self) {
|
||||
let n = self.0.get();
|
||||
self.0.set(n + 1);
|
||||
}
|
||||
}
|
||||
|
||||
flag.set(0);
|
||||
|
||||
{
|
||||
let mut array = ArrayVec::<[_; 2]>::new();
|
||||
array.push(Bump(flag));
|
||||
array.insert(0, Bump(flag));
|
||||
assert_eq!(flag.get(), 0);
|
||||
let ret = array.try_insert(1, Bump(flag));
|
||||
assert_eq!(flag.get(), 0);
|
||||
assert_matches!(ret, Err(_));
|
||||
drop(ret);
|
||||
assert_eq!(flag.get(), 1);
|
||||
}
|
||||
assert_eq!(flag.get(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pop_at() {
|
||||
let mut v = ArrayVec::<[String; 4]>::new();
|
||||
let s = String::from;
|
||||
v.push(s("a"));
|
||||
v.push(s("b"));
|
||||
v.push(s("c"));
|
||||
v.push(s("d"));
|
||||
|
||||
assert_eq!(v.pop_at(4), None);
|
||||
assert_eq!(v.pop_at(1), Some(s("b")));
|
||||
assert_eq!(v.pop_at(1), Some(s("c")));
|
||||
assert_eq!(v.pop_at(2), None);
|
||||
assert_eq!(&v[..], &["a", "d"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sizes() {
|
||||
let v = ArrayVec::from([0u8; 1 << 16]);
|
||||
assert_eq!(vec![0u8; v.len()], &v[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default() {
|
||||
use std::net;
|
||||
let s: ArrayString<[u8; 4]> = Default::default();
|
||||
// Something without `Default` implementation.
|
||||
let v: ArrayVec<[net::TcpStream; 4]> = Default::default();
|
||||
assert_eq!(s.len(), 0);
|
||||
assert_eq!(v.len(), 0);
|
||||
}
|
||||
|
||||
#[cfg(feature="array-sizes-33-128")]
|
||||
#[test]
|
||||
fn test_sizes_33_128() {
|
||||
ArrayVec::from([0u8; 52]);
|
||||
ArrayVec::from([0u8; 127]);
|
||||
}
|
||||
|
||||
#[cfg(feature="array-sizes-129-255")]
|
||||
#[test]
|
||||
fn test_sizes_129_255() {
|
||||
ArrayVec::from([0u8; 237]);
|
||||
ArrayVec::from([0u8; 255]);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_newish_stable_uses_maybe_uninit() {
|
||||
if option_env!("ARRAYVECTEST_ENSURE_MAYBEUNINIT").map(|s| !s.is_empty()).unwrap_or(false) {
|
||||
assert!(cfg!(has_stable_maybe_uninit));
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"files":{"Cargo.toml":"e7405a91fea075bb4fedb0e76e2039af27d6c380beaa31150f37655d79a7a3ab","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0245ee104228a100ce5fceecf43e25faae450494d9173f43fd94c27d69fdac13","README.rst":"8fab86c3c759d153a1a8a48e5f7f48546c898f0ec91433001c57fe0002af6455","benches/arraystring.rs":"f12b890977117ebde4ca42bcd6b91f2a6a087f2b235aaca6d15e30d125ae9f67","benches/extend.rs":"c3d69cc488ec5341b019cfed545ebbfea252f98718037b413f6a349da9489d1b","custom.css":"e6f2cd299392337b4e2959c52f422e5b7be11920ea98d10db44d10ddef5ed47c","src/array.rs":"8a42b3ff7a5a0713e8ee22462f303b0ce15bdc49a9fd5eb64f58e56855bdf944","src/array_string.rs":"fdcc24f0fd07e781b378f5d0190279e6d9c89b422f67e546ae443c602f967896","src/char.rs":"40af597d93895f206abcd33953b5d3d5a512d3b16ff5f96e492e659d9cca4209","src/errors.rs":"dde99bffaddfd45396aab7e07642cc018ef5435fe60c4f26a2c05a36555be18c","src/lib.rs":"4c00e50b532aec68b52fde4a737b7b5980b0cfb28f5c09ab8408d04896895a87","src/maybe_uninit.rs":"00659a86e8f84852d4355077a16beceaad0440ac0e81851fbac712fdb1850622","tests/serde.rs":"18c165cf6024f04a25b19aa139657d7c59f72d1541c9b24b44f9eaea01f507db","tests/tests.rs":"9633b92fe6c650b9b816cecac23b9c9e6a0365b1f67d4f0bfaad9e645e2bdc49"},"package":"cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"}
|
|
@ -0,0 +1,46 @@
|
|||
"""
|
||||
cargo-raze crate build file.
|
||||
|
||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||
"""
|
||||
package(default_visibility = [
|
||||
# Public for visibility by "@raze__crate__version//" targets.
|
||||
#
|
||||
# Prefer access through "//third_party/cargo", which limits external
|
||||
# visibility to explicit Cargo.toml dependencies.
|
||||
"//visibility:public",
|
||||
])
|
||||
|
||||
licenses([
|
||||
"notice", # "MIT,Apache-2.0"
|
||||
])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_rust//rust:rust.bzl",
|
||||
"rust_library",
|
||||
"rust_binary",
|
||||
"rust_test",
|
||||
)
|
||||
|
||||
|
||||
# Unsupported target "arraystring" with type "bench" omitted
|
||||
|
||||
rust_library(
|
||||
name = "arrayvec",
|
||||
crate_root = "src/lib.rs",
|
||||
crate_type = "lib",
|
||||
edition = "2018",
|
||||
srcs = glob(["**/*.rs"]),
|
||||
deps = [
|
||||
],
|
||||
rustc_flags = [
|
||||
"--cap-lints=allow",
|
||||
],
|
||||
version = "0.5.1",
|
||||
crate_features = [
|
||||
],
|
||||
)
|
||||
|
||||
# Unsupported target "extend" with type "bench" omitted
|
||||
# Unsupported target "serde" with type "test" omitted
|
||||
# Unsupported target "tests" with type "test" omitted
|
|
@ -0,0 +1,62 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "arrayvec"
|
||||
version = "0.5.1"
|
||||
authors = ["bluss"]
|
||||
description = "A vector with fixed capacity, backed by an array (it can be stored on the stack too). Implements fixed capacity ArrayVec and ArrayString."
|
||||
documentation = "https://docs.rs/arrayvec/"
|
||||
keywords = ["stack", "vector", "array", "data-structure", "no_std"]
|
||||
categories = ["data-structures", "no-std"]
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/bluss/arrayvec"
|
||||
[package.metadata.docs.rs]
|
||||
features = ["serde"]
|
||||
|
||||
[package.metadata.release]
|
||||
no-dev-version = true
|
||||
tag-name = "{{version}}"
|
||||
[profile.bench]
|
||||
debug = true
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
[[bench]]
|
||||
name = "extend"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "arraystring"
|
||||
harness = false
|
||||
[dependencies.serde]
|
||||
version = "1.0"
|
||||
optional = true
|
||||
default-features = false
|
||||
[dev-dependencies.bencher]
|
||||
version = "0.1.4"
|
||||
|
||||
[dev-dependencies.matches]
|
||||
version = "0.1"
|
||||
|
||||
[dev-dependencies.serde_test]
|
||||
version = "1.0"
|
||||
|
||||
[build-dependencies]
|
||||
|
||||
[features]
|
||||
array-sizes-129-255 = []
|
||||
array-sizes-33-128 = []
|
||||
default = ["std"]
|
||||
std = []
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) Ulrik Sverdrup "bluss" 2015-2017
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,259 @@
|
|||
|
||||
arrayvec
|
||||
========
|
||||
|
||||
A vector with fixed capacity.
|
||||
|
||||
Please read the `API documentation here`__
|
||||
|
||||
__ https://docs.rs/arrayvec
|
||||
|
||||
|build_status|_ |crates|_ |crates2|_
|
||||
|
||||
.. |build_status| image:: https://travis-ci.org/bluss/arrayvec.svg
|
||||
.. _build_status: https://travis-ci.org/bluss/arrayvec
|
||||
|
||||
.. |crates| image:: http://meritbadge.herokuapp.com/arrayvec
|
||||
.. _crates: https://crates.io/crates/arrayvec
|
||||
|
||||
.. |crates2| image:: http://meritbadge.herokuapp.com/nodrop
|
||||
.. _crates2: https://crates.io/crates/nodrop
|
||||
|
||||
Recent Changes (arrayvec)
|
||||
-------------------------
|
||||
|
||||
- 0.5.1
|
||||
|
||||
- Add ``as_ptr``, ``as_mut_ptr`` accessors directly on the ``ArrayVec`` by @tbu-
|
||||
(matches the same addition to ``Vec`` which happened in Rust 1.37).
|
||||
- Add method ``ArrayString::len`` (now available directly, not just through deref to str).
|
||||
- Use raw pointers instead of ``&mut [u8]`` for encoding chars into ``ArrayString``
|
||||
(uninit best practice fix).
|
||||
- Use raw pointers instead of ``get_unchecked_mut`` where the target may be
|
||||
uninitialized a everywhere relevant in the ArrayVec implementation
|
||||
(uninit best practice fix).
|
||||
- Changed inline hints on many methods, mainly removing inline hints
|
||||
- ``ArrayVec::dispose`` is now deprecated (it has no purpose anymore)
|
||||
|
||||
- 0.4.12
|
||||
|
||||
- Use raw pointers instead of ``get_unchecked_mut`` where the target may be
|
||||
uninitialized a everywhere relevant in the ArrayVec implementation.
|
||||
|
||||
- 0.5.0
|
||||
|
||||
- Use ``MaybeUninit`` (now unconditionally) in the implementation of
|
||||
``ArrayVec``
|
||||
- Use ``MaybeUninit`` (now unconditionally) in the implementation of
|
||||
``ArrayString``
|
||||
- The crate feature for serde serialization is now named ``serde``.
|
||||
- Updated the ``Array`` trait interface, and it is now easier to use for
|
||||
users outside the crate.
|
||||
- Add ``FromStr`` impl for ``ArrayString`` by @despawnerer
|
||||
- Add method ``try_extend_from_slice`` to ``ArrayVec``, which is always
|
||||
effecient by @Thomasdezeeuw.
|
||||
- Add method ``remaining_capacity`` by @Thomasdezeeuw
|
||||
- Improve performance of the ``extend`` method.
|
||||
- The index type of zero capacity vectors is now itself zero size, by
|
||||
@clarfon
|
||||
- Use ``drop_in_place`` for truncate and clear methods. This affects drop order
|
||||
and resume from panic during drop.
|
||||
- Use Rust 2018 edition for the implementation
|
||||
- Require Rust 1.36 or later, for the unconditional ``MaybeUninit``
|
||||
improvements.
|
||||
|
||||
- 0.4.11
|
||||
|
||||
- In Rust 1.36 or later, use newly stable MaybeUninit. This extends the
|
||||
soundness work introduced in 0.4.9, we are finally able to use this in
|
||||
stable. We use feature detection (build script) to enable this at build
|
||||
time.
|
||||
|
||||
- 0.4.10
|
||||
|
||||
- Use ``repr(C)`` in the ``union`` version that was introduced in 0.4.9, to
|
||||
allay some soundness concerns.
|
||||
|
||||
- 0.4.9
|
||||
|
||||
- Use ``union`` in the implementation on when this is detected to be supported
|
||||
(nightly only for now). This is a better solution for treating uninitialized
|
||||
regions correctly, and we'll use it in stable Rust as soon as we are able.
|
||||
When this is enabled, the ``ArrayVec`` has no space overhead in its memory
|
||||
layout, although the size of the vec should not be relied upon. (See `#114`_)
|
||||
- ``ArrayString`` updated to not use uninitialized memory, it instead zeros its
|
||||
backing array. This will be refined in the next version, since we
|
||||
need to make changes to the user visible API.
|
||||
- The ``use_union`` feature now does nothing (like its documentation foretold).
|
||||
|
||||
.. _`#114`: https://github.com/bluss/arrayvec/pull/114
|
||||
|
||||
- 0.4.8
|
||||
|
||||
- Implement Clone and Debug for ``IntoIter`` by @clarcharr
|
||||
- Add more array sizes under crate features. These cover all in the range
|
||||
up to 128 and 129 to 255 respectively (we have a few of those by default):
|
||||
|
||||
- ``array-size-33-128``
|
||||
- ``array-size-129-255``
|
||||
|
||||
- 0.4.7
|
||||
|
||||
- Fix future compat warning about raw pointer casts
|
||||
- Use ``drop_in_place`` when dropping the arrayvec by-value iterator
|
||||
- Decrease mininum Rust version (see docs) by @jeehoonkang
|
||||
|
||||
- 0.3.25
|
||||
|
||||
- Fix future compat warning about raw pointer casts
|
||||
|
||||
- 0.4.6
|
||||
|
||||
- Fix compilation on 16-bit targets. This means, the 65536 array size is not
|
||||
included on these targets.
|
||||
|
||||
- 0.3.24
|
||||
|
||||
- Fix compilation on 16-bit targets. This means, the 65536 array size is not
|
||||
included on these targets.
|
||||
- Fix license files so that they are both included (was fixed in 0.4 before)
|
||||
|
||||
- 0.4.5
|
||||
|
||||
- Add methods to ``ArrayString`` by @DenialAdams:
|
||||
|
||||
- ``.pop() -> Option<char>``
|
||||
- ``.truncate(new_len)``
|
||||
- ``.remove(index) -> char``
|
||||
|
||||
- Remove dependency on crate odds
|
||||
- Document debug assertions in unsafe methods better
|
||||
|
||||
- 0.4.4
|
||||
|
||||
- Add method ``ArrayVec::truncate()`` by @niklasf
|
||||
|
||||
- 0.4.3
|
||||
|
||||
- Improve performance for ``ArrayVec::extend`` with a lower level
|
||||
implementation (#74)
|
||||
- Small cleanup in dependencies (use no std for crates where we don't need more)
|
||||
|
||||
- 0.4.2
|
||||
|
||||
- Add constructor method ``new`` to ``CapacityError``.
|
||||
|
||||
- 0.4.1
|
||||
|
||||
- Add ``Default`` impl to ``ArrayString`` by @tbu-
|
||||
|
||||
- 0.4.0
|
||||
|
||||
- Reformed signatures and error handling by @bluss and @tbu-:
|
||||
|
||||
- ``ArrayVec``'s ``push, insert, remove, swap_remove`` now match ``Vec``'s
|
||||
corresponding signature and panic on capacity errors where applicable.
|
||||
- Add fallible methods ``try_push, insert`` and checked methods
|
||||
``pop_at, swap_pop``.
|
||||
- Similar changes to ``ArrayString``'s push methods.
|
||||
|
||||
- Use a local version of the ``RangeArgument`` trait
|
||||
- Add array sizes 50, 150, 200 by @daboross
|
||||
- Support serde 1.0 by @daboross
|
||||
- New method ``.push_unchecked()`` by @niklasf
|
||||
- ``ArrayString`` implements ``PartialOrd, Ord`` by @tbu-
|
||||
- Require Rust 1.14
|
||||
- crate feature ``use_generic_array`` was dropped.
|
||||
|
||||
- 0.3.23
|
||||
|
||||
- Implement ``PartialOrd, Ord`` as well as ``PartialOrd<str>`` for
|
||||
``ArrayString``.
|
||||
|
||||
- 0.3.22
|
||||
|
||||
- Implement ``Array`` for the 65536 size
|
||||
|
||||
- 0.3.21
|
||||
|
||||
- Use ``encode_utf8`` from crate odds
|
||||
- Add constructor ``ArrayString::from_byte_string``
|
||||
|
||||
- 0.3.20
|
||||
|
||||
- Simplify and speed up ``ArrayString``’s ``.push(char)``-
|
||||
|
||||
- 0.3.19
|
||||
|
||||
- Add new crate feature ``use_generic_array`` which allows using their
|
||||
``GenericArray`` just like a regular fixed size array for the storage
|
||||
of an ``ArrayVec``.
|
||||
|
||||
- 0.3.18
|
||||
|
||||
- Fix bounds check in ``ArrayVec::insert``!
|
||||
It would be buggy if ``self.len() < index < self.capacity()``. Take note of
|
||||
the push out behavior specified in the docs.
|
||||
|
||||
- 0.3.17
|
||||
|
||||
- Added crate feature ``use_union`` which forwards to the nodrop crate feature
|
||||
- Added methods ``.is_full()`` to ``ArrayVec`` and ``ArrayString``.
|
||||
|
||||
- 0.3.16
|
||||
|
||||
- Added method ``.retain()`` to ``ArrayVec``.
|
||||
- Added methods ``.as_slice(), .as_mut_slice()`` to ``ArrayVec`` and ``.as_str()``
|
||||
to ``ArrayString``.
|
||||
|
||||
- 0.3.15
|
||||
|
||||
- Add feature std, which you can opt out of to use ``no_std`` (requires Rust 1.6
|
||||
to opt out).
|
||||
- Implement ``Clone::clone_from`` for ArrayVec and ArrayString
|
||||
|
||||
- 0.3.14
|
||||
|
||||
- Add ``ArrayString::from(&str)``
|
||||
|
||||
- 0.3.13
|
||||
|
||||
- Added ``DerefMut`` impl for ``ArrayString``.
|
||||
- Added method ``.simplify()`` to drop the element for ``CapacityError``.
|
||||
- Added method ``.dispose()`` to ``ArrayVec``
|
||||
|
||||
- 0.3.12
|
||||
|
||||
- Added ArrayString, a fixed capacity analogy of String
|
||||
|
||||
- 0.3.11
|
||||
|
||||
- Added trait impls Default, PartialOrd, Ord, Write for ArrayVec
|
||||
|
||||
- 0.3.10
|
||||
|
||||
- Go back to using external NoDrop, fixing a panic safety bug (issue #3)
|
||||
|
||||
- 0.3.8
|
||||
|
||||
- Inline the non-dropping logic to remove one drop flag in the
|
||||
ArrayVec representation.
|
||||
|
||||
- 0.3.7
|
||||
|
||||
- Added method .into_inner()
|
||||
- Added unsafe method .set_len()
|
||||
|
||||
|
||||
License
|
||||
=======
|
||||
|
||||
Dual-licensed to be compatible with the Rust project.
|
||||
|
||||
Licensed under the Apache License, Version 2.0
|
||||
http://www.apache.org/licenses/LICENSE-2.0 or the MIT license
|
||||
http://opensource.org/licenses/MIT, at your
|
||||
option. This file may not be copied, modified, or distributed
|
||||
except according to those terms.
|
||||
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
|
||||
extern crate arrayvec;
|
||||
#[macro_use] extern crate bencher;
|
||||
|
||||
use arrayvec::ArrayString;
|
||||
|
||||
use bencher::Bencher;
|
||||
|
||||
fn try_push_c(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
while v.try_push('c').is_ok() {
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn try_push_alpha(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
while v.try_push('α').is_ok() {
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
// Yes, pushing a string char-by-char is slow. Use .push_str.
|
||||
fn try_push_string(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
let input = "abcαβγ“”";
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
for ch in input.chars().cycle() {
|
||||
if !v.try_push(ch).is_ok() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn push_c(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
while !v.is_full() {
|
||||
v.push('c');
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn push_alpha(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
while !v.is_full() {
|
||||
v.push('α');
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn push_string(b: &mut Bencher) {
|
||||
let mut v = ArrayString::<[u8; 512]>::new();
|
||||
let input = "abcαβγ“”";
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
for ch in input.chars().cycle() {
|
||||
if !v.is_full() {
|
||||
v.push(ch);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
v.len()
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
benchmark_group!(benches, try_push_c, try_push_alpha, try_push_string, push_c,
|
||||
push_alpha, push_string);
|
||||
benchmark_main!(benches);
|
|
@ -0,0 +1,78 @@
|
|||
|
||||
extern crate arrayvec;
|
||||
#[macro_use] extern crate bencher;
|
||||
|
||||
use std::io::Write;
|
||||
|
||||
use arrayvec::ArrayVec;
|
||||
|
||||
use bencher::Bencher;
|
||||
use bencher::black_box;
|
||||
|
||||
fn extend_with_constant(b: &mut Bencher) {
|
||||
let mut v = ArrayVec::<[u8; 512]>::new();
|
||||
let cap = v.capacity();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
let constant = black_box(1);
|
||||
v.extend((0..cap).map(move |_| constant));
|
||||
v[511]
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn extend_with_range(b: &mut Bencher) {
|
||||
let mut v = ArrayVec::<[u8; 512]>::new();
|
||||
let cap = v.capacity();
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
let range = 0..cap;
|
||||
v.extend(range.map(|x| black_box(x as _)));
|
||||
v[511]
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn extend_with_slice(b: &mut Bencher) {
|
||||
let mut v = ArrayVec::<[u8; 512]>::new();
|
||||
let data = [1; 512];
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
let iter = data.iter().map(|&x| x);
|
||||
v.extend(iter);
|
||||
v[511]
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn extend_with_write(b: &mut Bencher) {
|
||||
let mut v = ArrayVec::<[u8; 512]>::new();
|
||||
let data = [1; 512];
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
v.write(&data[..]).ok();
|
||||
v[511]
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
fn extend_from_slice(b: &mut Bencher) {
|
||||
let mut v = ArrayVec::<[u8; 512]>::new();
|
||||
let data = [1; 512];
|
||||
b.iter(|| {
|
||||
v.clear();
|
||||
v.try_extend_from_slice(&data).ok();
|
||||
v[511]
|
||||
});
|
||||
b.bytes = v.capacity() as u64;
|
||||
}
|
||||
|
||||
benchmark_group!(benches,
|
||||
extend_with_constant,
|
||||
extend_with_range,
|
||||
extend_with_slice,
|
||||
extend_with_write,
|
||||
extend_from_slice
|
||||
);
|
||||
|
||||
benchmark_main!(benches);
|
|
@ -0,0 +1,25 @@
|
|||
|
||||
.docblock pre.rust { background: #eeeeff; }
|
||||
pre.trait, pre.fn, pre.struct, pre.enum, pre.typedef { background: #fcfefc; }
|
||||
|
||||
/* Small “example” label for doc examples */
|
||||
.docblock pre.rust::before {
|
||||
content: "example";
|
||||
float: right;
|
||||
font-style: italic;
|
||||
font-size: 0.8em;
|
||||
margin-top: -10px;
|
||||
margin-right: -5px;
|
||||
}
|
||||
|
||||
|
||||
/* Fixup where display in trait listing */
|
||||
pre.trait .where::before {
|
||||
content: '\a ';
|
||||
}
|
||||
|
||||
.docblock code {
|
||||
background-color: inherit;
|
||||
font-weight: bold;
|
||||
padding: 0 0.1em;
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
|
||||
/// Trait for fixed size arrays.
|
||||
///
|
||||
/// This trait is implemented for some specific array sizes, see
|
||||
/// the implementor list below. At the current state of Rust we can't
|
||||
/// make this fully general for every array size.
|
||||
///
|
||||
/// The following crate features add more array sizes (and they are not
|
||||
/// enabled by default due to their impact on compliation speed).
|
||||
///
|
||||
/// - `array-sizes-33-128`: All sizes 33 to 128 are implemented
|
||||
/// (a few in this range are included by default).
|
||||
/// - `array-sizes-129-255`: All sizes 129 to 255 are implemented
|
||||
/// (a few in this range are included by default).
|
||||
///
|
||||
/// ## Safety
|
||||
///
|
||||
/// This trait can *only* be implemented by fixed-size arrays or types with
|
||||
/// *exactly* the representation of a fixed size array (of the right element
|
||||
/// type and capacity).
|
||||
///
|
||||
/// Normally this trait is an implementation detail of arrayvec and doesn’t
|
||||
/// need implementing.
|
||||
pub unsafe trait Array {
|
||||
/// The array’s element type
|
||||
type Item;
|
||||
/// The smallest type that can index and tell the length of the array.
|
||||
#[doc(hidden)]
|
||||
type Index: Index;
|
||||
/// The array's element capacity
|
||||
const CAPACITY: usize;
|
||||
fn as_slice(&self) -> &[Self::Item];
|
||||
fn as_mut_slice(&mut self) -> &mut [Self::Item];
|
||||
}
|
||||
|
||||
pub trait Index : PartialEq + Copy {
|
||||
fn to_usize(self) -> usize;
|
||||
fn from(_: usize) -> Self;
|
||||
}
|
||||
|
||||
impl Index for () {
|
||||
#[inline(always)]
|
||||
fn to_usize(self) -> usize { 0 }
|
||||
#[inline(always)]
|
||||
fn from(_ix: usize) -> Self { () }
|
||||
}
|
||||
|
||||
impl Index for bool {
|
||||
#[inline(always)]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
#[inline(always)]
|
||||
fn from(ix: usize) -> Self { ix != 0 }
|
||||
}
|
||||
|
||||
impl Index for u8 {
|
||||
#[inline(always)]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
#[inline(always)]
|
||||
fn from(ix: usize) -> Self { ix as u8 }
|
||||
}
|
||||
|
||||
impl Index for u16 {
|
||||
#[inline(always)]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
#[inline(always)]
|
||||
fn from(ix: usize) -> Self { ix as u16 }
|
||||
}
|
||||
|
||||
impl Index for u32 {
|
||||
#[inline(always)]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
#[inline(always)]
|
||||
fn from(ix: usize) -> Self { ix as u32 }
|
||||
}
|
||||
|
||||
impl Index for usize {
|
||||
#[inline(always)]
|
||||
fn to_usize(self) -> usize { self }
|
||||
#[inline(always)]
|
||||
fn from(ix: usize) -> Self { ix }
|
||||
}
|
||||
|
||||
macro_rules! fix_array_impl {
|
||||
($index_type:ty, $len:expr ) => (
|
||||
unsafe impl<T> Array for [T; $len] {
|
||||
type Item = T;
|
||||
type Index = $index_type;
|
||||
const CAPACITY: usize = $len;
|
||||
#[doc(hidden)]
|
||||
fn as_slice(&self) -> &[Self::Item] { self }
|
||||
#[doc(hidden)]
|
||||
fn as_mut_slice(&mut self) -> &mut [Self::Item] { self }
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
macro_rules! fix_array_impl_recursive {
|
||||
($index_type:ty, ) => ();
|
||||
($index_type:ty, $($len:expr,)*) => (
|
||||
$(fix_array_impl!($index_type, $len);)*
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
fix_array_impl_recursive!((), 0,);
|
||||
fix_array_impl_recursive!(bool, 1,);
|
||||
fix_array_impl_recursive!(u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
|
||||
28, 29, 30, 31, );
|
||||
|
||||
#[cfg(not(feature="array-sizes-33-128"))]
|
||||
fix_array_impl_recursive!(u8, 32, 40, 48, 50, 56, 64, 72, 96, 100, 128, );
|
||||
|
||||
#[cfg(feature="array-sizes-33-128")]
|
||||
fix_array_impl_recursive!(u8,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
|
||||
92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
|
||||
109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
|
||||
125, 126, 127, 128,
|
||||
);
|
||||
|
||||
#[cfg(not(feature="array-sizes-129-255"))]
|
||||
fix_array_impl_recursive!(u8, 160, 192, 200, 224,);
|
||||
|
||||
#[cfg(feature="array-sizes-129-255")]
|
||||
fix_array_impl_recursive!(u8,
|
||||
129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
|
||||
141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
|
||||
157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172,
|
||||
173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
|
||||
189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204,
|
||||
205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
|
||||
221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236,
|
||||
237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
|
||||
253, 254, 255,
|
||||
);
|
||||
|
||||
fix_array_impl_recursive!(u16, 256, 384, 512, 768, 1024, 2048, 4096, 8192, 16384, 32768,);
|
||||
// This array size doesn't exist on 16-bit
|
||||
#[cfg(any(target_pointer_width="32", target_pointer_width="64"))]
|
||||
fix_array_impl_recursive!(u32, 1 << 16,);
|
||||
|
|
@ -0,0 +1,567 @@
|
|||
use std::borrow::Borrow;
|
||||
use std::cmp;
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::ptr;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::str;
|
||||
use std::str::FromStr;
|
||||
use std::str::Utf8Error;
|
||||
use std::slice;
|
||||
|
||||
use crate::array::Array;
|
||||
use crate::array::Index;
|
||||
use crate::CapacityError;
|
||||
use crate::char::encode_utf8;
|
||||
|
||||
#[cfg(feature="serde")]
|
||||
use serde::{Serialize, Deserialize, Serializer, Deserializer};
|
||||
|
||||
use super::MaybeUninit as MaybeUninitCopy;
|
||||
|
||||
/// A string with a fixed capacity.
|
||||
///
|
||||
/// The `ArrayString` is a string backed by a fixed size array. It keeps track
|
||||
/// of its length.
|
||||
///
|
||||
/// The string is a contiguous value that you can store directly on the stack
|
||||
/// if needed.
|
||||
#[derive(Copy)]
|
||||
pub struct ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
xs: MaybeUninitCopy<A>,
|
||||
len: A::Index,
|
||||
}
|
||||
|
||||
impl<A> Default for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
/// Return an empty `ArrayString`
|
||||
fn default() -> ArrayString<A> {
|
||||
ArrayString::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
/// Create a new empty `ArrayString`.
|
||||
///
|
||||
/// Capacity is inferred from the type parameter.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 16]>::new();
|
||||
/// string.push_str("foo");
|
||||
/// assert_eq!(&string[..], "foo");
|
||||
/// assert_eq!(string.capacity(), 16);
|
||||
/// ```
|
||||
pub fn new() -> ArrayString<A> {
|
||||
unsafe {
|
||||
ArrayString {
|
||||
xs: MaybeUninitCopy::uninitialized(),
|
||||
len: Index::from(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the length of the string.
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize { self.len.to_usize() }
|
||||
|
||||
/// Create a new `ArrayString` from a `str`.
|
||||
///
|
||||
/// Capacity is inferred from the type parameter.
|
||||
///
|
||||
/// **Errors** if the backing array is not large enough to fit the string.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 3]>::from("foo").unwrap();
|
||||
/// assert_eq!(&string[..], "foo");
|
||||
/// assert_eq!(string.len(), 3);
|
||||
/// assert_eq!(string.capacity(), 3);
|
||||
/// ```
|
||||
pub fn from(s: &str) -> Result<Self, CapacityError<&str>> {
|
||||
let mut arraystr = Self::new();
|
||||
arraystr.try_push_str(s)?;
|
||||
Ok(arraystr)
|
||||
}
|
||||
|
||||
/// Create a new `ArrayString` from a byte string literal.
|
||||
///
|
||||
/// **Errors** if the byte string literal is not valid UTF-8.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let string = ArrayString::from_byte_string(b"hello world").unwrap();
|
||||
/// ```
|
||||
pub fn from_byte_string(b: &A) -> Result<Self, Utf8Error> {
|
||||
let len = str::from_utf8(b.as_slice())?.len();
|
||||
debug_assert_eq!(len, A::CAPACITY);
|
||||
Ok(ArrayString {
|
||||
xs: MaybeUninitCopy::from(*b),
|
||||
len: Index::from(A::CAPACITY),
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the capacity of the `ArrayString`.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let string = ArrayString::<[_; 3]>::new();
|
||||
/// assert_eq!(string.capacity(), 3);
|
||||
/// ```
|
||||
#[inline(always)]
|
||||
pub fn capacity(&self) -> usize { A::CAPACITY }
|
||||
|
||||
/// Return if the `ArrayString` is completely filled.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 1]>::new();
|
||||
/// assert!(!string.is_full());
|
||||
/// string.push_str("A");
|
||||
/// assert!(string.is_full());
|
||||
/// ```
|
||||
pub fn is_full(&self) -> bool { self.len() == self.capacity() }
|
||||
|
||||
/// Adds the given char to the end of the string.
|
||||
///
|
||||
/// ***Panics*** if the backing array is not large enough to fit the additional char.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 2]>::new();
|
||||
///
|
||||
/// string.push('a');
|
||||
/// string.push('b');
|
||||
///
|
||||
/// assert_eq!(&string[..], "ab");
|
||||
/// ```
|
||||
pub fn push(&mut self, c: char) {
|
||||
self.try_push(c).unwrap();
|
||||
}
|
||||
|
||||
/// Adds the given char to the end of the string.
|
||||
///
|
||||
/// Returns `Ok` if the push succeeds.
|
||||
///
|
||||
/// **Errors** if the backing array is not large enough to fit the additional char.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 2]>::new();
|
||||
///
|
||||
/// string.try_push('a').unwrap();
|
||||
/// string.try_push('b').unwrap();
|
||||
/// let overflow = string.try_push('c');
|
||||
///
|
||||
/// assert_eq!(&string[..], "ab");
|
||||
/// assert_eq!(overflow.unwrap_err().element(), 'c');
|
||||
/// ```
|
||||
pub fn try_push(&mut self, c: char) -> Result<(), CapacityError<char>> {
|
||||
let len = self.len();
|
||||
unsafe {
|
||||
let ptr = self.xs.ptr_mut().add(len);
|
||||
let remaining_cap = self.capacity() - len;
|
||||
match encode_utf8(c, ptr, remaining_cap) {
|
||||
Ok(n) => {
|
||||
self.set_len(len + n);
|
||||
Ok(())
|
||||
}
|
||||
Err(_) => Err(CapacityError::new(c)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the given string slice to the end of the string.
|
||||
///
|
||||
/// ***Panics*** if the backing array is not large enough to fit the string.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 2]>::new();
|
||||
///
|
||||
/// string.push_str("a");
|
||||
/// string.push_str("d");
|
||||
///
|
||||
/// assert_eq!(&string[..], "ad");
|
||||
/// ```
|
||||
pub fn push_str(&mut self, s: &str) {
|
||||
self.try_push_str(s).unwrap()
|
||||
}
|
||||
|
||||
/// Adds the given string slice to the end of the string.
|
||||
///
|
||||
/// Returns `Ok` if the push succeeds.
|
||||
///
|
||||
/// **Errors** if the backing array is not large enough to fit the string.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 2]>::new();
|
||||
///
|
||||
/// string.try_push_str("a").unwrap();
|
||||
/// let overflow1 = string.try_push_str("bc");
|
||||
/// string.try_push_str("d").unwrap();
|
||||
/// let overflow2 = string.try_push_str("ef");
|
||||
///
|
||||
/// assert_eq!(&string[..], "ad");
|
||||
/// assert_eq!(overflow1.unwrap_err().element(), "bc");
|
||||
/// assert_eq!(overflow2.unwrap_err().element(), "ef");
|
||||
/// ```
|
||||
pub fn try_push_str<'a>(&mut self, s: &'a str) -> Result<(), CapacityError<&'a str>> {
|
||||
if s.len() > self.capacity() - self.len() {
|
||||
return Err(CapacityError::new(s));
|
||||
}
|
||||
unsafe {
|
||||
let dst = self.xs.ptr_mut().offset(self.len() as isize);
|
||||
let src = s.as_ptr();
|
||||
ptr::copy_nonoverlapping(src, dst, s.len());
|
||||
let newl = self.len() + s.len();
|
||||
self.set_len(newl);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Removes the last character from the string and returns it.
|
||||
///
|
||||
/// Returns `None` if this `ArrayString` is empty.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut s = ArrayString::<[_; 3]>::from("foo").unwrap();
|
||||
///
|
||||
/// assert_eq!(s.pop(), Some('o'));
|
||||
/// assert_eq!(s.pop(), Some('o'));
|
||||
/// assert_eq!(s.pop(), Some('f'));
|
||||
///
|
||||
/// assert_eq!(s.pop(), None);
|
||||
/// ```
|
||||
pub fn pop(&mut self) -> Option<char> {
|
||||
let ch = match self.chars().rev().next() {
|
||||
Some(ch) => ch,
|
||||
None => return None,
|
||||
};
|
||||
let new_len = self.len() - ch.len_utf8();
|
||||
unsafe {
|
||||
self.set_len(new_len);
|
||||
}
|
||||
Some(ch)
|
||||
}
|
||||
|
||||
/// Shortens this `ArrayString` to the specified length.
|
||||
///
|
||||
/// If `new_len` is greater than the string’s current length, this has no
|
||||
/// effect.
|
||||
///
|
||||
/// ***Panics*** if `new_len` does not lie on a `char` boundary.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut string = ArrayString::<[_; 6]>::from("foobar").unwrap();
|
||||
/// string.truncate(3);
|
||||
/// assert_eq!(&string[..], "foo");
|
||||
/// string.truncate(4);
|
||||
/// assert_eq!(&string[..], "foo");
|
||||
/// ```
|
||||
pub fn truncate(&mut self, new_len: usize) {
|
||||
if new_len <= self.len() {
|
||||
assert!(self.is_char_boundary(new_len));
|
||||
unsafe {
|
||||
// In libstd truncate is called on the underlying vector,
|
||||
// which in turns drops each element.
|
||||
// As we know we don't have to worry about Drop,
|
||||
// we can just set the length (a la clear.)
|
||||
self.set_len(new_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes a `char` from this `ArrayString` at a byte position and returns it.
|
||||
///
|
||||
/// This is an `O(n)` operation, as it requires copying every element in the
|
||||
/// array.
|
||||
///
|
||||
/// ***Panics*** if `idx` is larger than or equal to the `ArrayString`’s length,
|
||||
/// or if it does not lie on a `char` boundary.
|
||||
///
|
||||
/// ```
|
||||
/// use arrayvec::ArrayString;
|
||||
///
|
||||
/// let mut s = ArrayString::<[_; 3]>::from("foo").unwrap();
|
||||
///
|
||||
/// assert_eq!(s.remove(0), 'f');
|
||||
/// assert_eq!(s.remove(1), 'o');
|
||||
/// assert_eq!(s.remove(0), 'o');
|
||||
/// ```
|
||||
pub fn remove(&mut self, idx: usize) -> char {
|
||||
let ch = match self[idx..].chars().next() {
|
||||
Some(ch) => ch,
|
||||
None => panic!("cannot remove a char from the end of a string"),
|
||||
};
|
||||
|
||||
let next = idx + ch.len_utf8();
|
||||
let len = self.len();
|
||||
unsafe {
|
||||
ptr::copy(self.xs.ptr().offset(next as isize),
|
||||
self.xs.ptr_mut().offset(idx as isize),
|
||||
len - next);
|
||||
self.set_len(len - (next - idx));
|
||||
}
|
||||
ch
|
||||
}
|
||||
|
||||
/// Make the string empty.
|
||||
pub fn clear(&mut self) {
|
||||
unsafe {
|
||||
self.set_len(0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the strings’s length.
|
||||
///
|
||||
/// This function is `unsafe` because it changes the notion of the
|
||||
/// number of “valid” bytes in the string. Use with care.
|
||||
///
|
||||
/// This method uses *debug assertions* to check the validity of `length`
|
||||
/// and may use other debug assertions.
|
||||
pub unsafe fn set_len(&mut self, length: usize) {
|
||||
debug_assert!(length <= self.capacity());
|
||||
self.len = Index::from(length);
|
||||
}
|
||||
|
||||
/// Return a string slice of the whole `ArrayString`.
|
||||
pub fn as_str(&self) -> &str {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Deref for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
type Target = str;
|
||||
#[inline]
|
||||
fn deref(&self) -> &str {
|
||||
unsafe {
|
||||
let sl = slice::from_raw_parts(self.xs.ptr(), self.len.to_usize());
|
||||
str::from_utf8_unchecked(sl)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> DerefMut for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
#[inline]
|
||||
fn deref_mut(&mut self) -> &mut str {
|
||||
unsafe {
|
||||
let sl = slice::from_raw_parts_mut(self.xs.ptr_mut(), self.len.to_usize());
|
||||
str::from_utf8_unchecked_mut(sl)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> PartialEq for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn eq(&self, rhs: &Self) -> bool {
|
||||
**self == **rhs
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> PartialEq<str> for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn eq(&self, rhs: &str) -> bool {
|
||||
&**self == rhs
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> PartialEq<ArrayString<A>> for str
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn eq(&self, rhs: &ArrayString<A>) -> bool {
|
||||
self == &**rhs
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Eq for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{ }
|
||||
|
||||
impl<A> Hash for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn hash<H: Hasher>(&self, h: &mut H) {
|
||||
(**self).hash(h)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Borrow<str> for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn borrow(&self) -> &str { self }
|
||||
}
|
||||
|
||||
impl<A> AsRef<str> for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn as_ref(&self) -> &str { self }
|
||||
}
|
||||
|
||||
impl<A> fmt::Debug for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { (**self).fmt(f) }
|
||||
}
|
||||
|
||||
impl<A> fmt::Display for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { (**self).fmt(f) }
|
||||
}
|
||||
|
||||
/// `Write` appends written data to the end of the string.
|
||||
impl<A> fmt::Write for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn write_char(&mut self, c: char) -> fmt::Result {
|
||||
self.try_push(c).map_err(|_| fmt::Error)
|
||||
}
|
||||
|
||||
fn write_str(&mut self, s: &str) -> fmt::Result {
|
||||
self.try_push_str(s).map_err(|_| fmt::Error)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Clone for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn clone(&self) -> ArrayString<A> {
|
||||
*self
|
||||
}
|
||||
fn clone_from(&mut self, rhs: &Self) {
|
||||
// guaranteed to fit due to types matching.
|
||||
self.clear();
|
||||
self.try_push_str(rhs).ok();
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> PartialOrd for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn partial_cmp(&self, rhs: &Self) -> Option<cmp::Ordering> {
|
||||
(**self).partial_cmp(&**rhs)
|
||||
}
|
||||
fn lt(&self, rhs: &Self) -> bool { **self < **rhs }
|
||||
fn le(&self, rhs: &Self) -> bool { **self <= **rhs }
|
||||
fn gt(&self, rhs: &Self) -> bool { **self > **rhs }
|
||||
fn ge(&self, rhs: &Self) -> bool { **self >= **rhs }
|
||||
}
|
||||
|
||||
impl<A> PartialOrd<str> for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn partial_cmp(&self, rhs: &str) -> Option<cmp::Ordering> {
|
||||
(**self).partial_cmp(rhs)
|
||||
}
|
||||
fn lt(&self, rhs: &str) -> bool { &**self < rhs }
|
||||
fn le(&self, rhs: &str) -> bool { &**self <= rhs }
|
||||
fn gt(&self, rhs: &str) -> bool { &**self > rhs }
|
||||
fn ge(&self, rhs: &str) -> bool { &**self >= rhs }
|
||||
}
|
||||
|
||||
impl<A> PartialOrd<ArrayString<A>> for str
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn partial_cmp(&self, rhs: &ArrayString<A>) -> Option<cmp::Ordering> {
|
||||
self.partial_cmp(&**rhs)
|
||||
}
|
||||
fn lt(&self, rhs: &ArrayString<A>) -> bool { self < &**rhs }
|
||||
fn le(&self, rhs: &ArrayString<A>) -> bool { self <= &**rhs }
|
||||
fn gt(&self, rhs: &ArrayString<A>) -> bool { self > &**rhs }
|
||||
fn ge(&self, rhs: &ArrayString<A>) -> bool { self >= &**rhs }
|
||||
}
|
||||
|
||||
impl<A> Ord for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn cmp(&self, rhs: &Self) -> cmp::Ordering {
|
||||
(**self).cmp(&**rhs)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> FromStr for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
type Err = CapacityError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Self::from(s).map_err(CapacityError::simplify)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature="serde")]
|
||||
/// Requires crate feature `"serde"`
|
||||
impl<A> Serialize for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer
|
||||
{
|
||||
serializer.serialize_str(&*self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature="serde")]
|
||||
/// Requires crate feature `"serde"`
|
||||
impl<'de, A> Deserialize<'de> for ArrayString<A>
|
||||
where A: Array<Item=u8> + Copy
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
use serde::de::{self, Visitor};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
struct ArrayStringVisitor<A: Array<Item=u8>>(PhantomData<A>);
|
||||
|
||||
impl<'de, A: Copy + Array<Item=u8>> Visitor<'de> for ArrayStringVisitor<A> {
|
||||
type Value = ArrayString<A>;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(formatter, "a string no more than {} bytes long", A::CAPACITY)
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||
where E: de::Error,
|
||||
{
|
||||
ArrayString::from(v).map_err(|_| E::invalid_length(v.len(), &self))
|
||||
}
|
||||
|
||||
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
|
||||
where E: de::Error,
|
||||
{
|
||||
let s = str::from_utf8(v).map_err(|_| E::invalid_value(de::Unexpected::Bytes(v), &self))?;
|
||||
|
||||
ArrayString::from(s).map_err(|_| E::invalid_length(s.len(), &self))
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_str(ArrayStringVisitor::<A>(PhantomData))
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
//
|
||||
// Original authors: alexchrichton, bluss
|
||||
|
||||
use std::ptr;
|
||||
|
||||
// UTF-8 ranges and tags for encoding characters
|
||||
const TAG_CONT: u8 = 0b1000_0000;
|
||||
const TAG_TWO_B: u8 = 0b1100_0000;
|
||||
const TAG_THREE_B: u8 = 0b1110_0000;
|
||||
const TAG_FOUR_B: u8 = 0b1111_0000;
|
||||
const MAX_ONE_B: u32 = 0x80;
|
||||
const MAX_TWO_B: u32 = 0x800;
|
||||
const MAX_THREE_B: u32 = 0x10000;
|
||||
|
||||
/// Placeholder
|
||||
pub struct EncodeUtf8Error;
|
||||
|
||||
#[inline]
|
||||
unsafe fn write(ptr: *mut u8, index: usize, byte: u8) {
|
||||
ptr::write(ptr.add(index), byte)
|
||||
}
|
||||
|
||||
/// Encode a char into buf using UTF-8.
|
||||
///
|
||||
/// On success, return the byte length of the encoding (1, 2, 3 or 4).<br>
|
||||
/// On error, return `EncodeUtf8Error` if the buffer was too short for the char.
|
||||
///
|
||||
/// Safety: `ptr` must be writable for `len` bytes.
|
||||
#[inline]
|
||||
pub unsafe fn encode_utf8(ch: char, ptr: *mut u8, len: usize) -> Result<usize, EncodeUtf8Error>
|
||||
{
|
||||
let code = ch as u32;
|
||||
if code < MAX_ONE_B && len >= 1 {
|
||||
write(ptr, 0, code as u8);
|
||||
return Ok(1);
|
||||
} else if code < MAX_TWO_B && len >= 2 {
|
||||
write(ptr, 0, (code >> 6 & 0x1F) as u8 | TAG_TWO_B);
|
||||
write(ptr, 1, (code & 0x3F) as u8 | TAG_CONT);
|
||||
return Ok(2);
|
||||
} else if code < MAX_THREE_B && len >= 3 {
|
||||
write(ptr, 0, (code >> 12 & 0x0F) as u8 | TAG_THREE_B);
|
||||
write(ptr, 1, (code >> 6 & 0x3F) as u8 | TAG_CONT);
|
||||
write(ptr, 2, (code & 0x3F) as u8 | TAG_CONT);
|
||||
return Ok(3);
|
||||
} else if len >= 4 {
|
||||
write(ptr, 0, (code >> 18 & 0x07) as u8 | TAG_FOUR_B);
|
||||
write(ptr, 1, (code >> 12 & 0x3F) as u8 | TAG_CONT);
|
||||
write(ptr, 2, (code >> 6 & 0x3F) as u8 | TAG_CONT);
|
||||
write(ptr, 3, (code & 0x3F) as u8 | TAG_CONT);
|
||||
return Ok(4);
|
||||
};
|
||||
Err(EncodeUtf8Error)
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_encode_utf8() {
|
||||
// Test that all codepoints are encoded correctly
|
||||
let mut data = [0u8; 16];
|
||||
for codepoint in 0..=(std::char::MAX as u32) {
|
||||
if let Some(ch) = std::char::from_u32(codepoint) {
|
||||
for elt in &mut data { *elt = 0; }
|
||||
let ptr = data.as_mut_ptr();
|
||||
let len = data.len();
|
||||
unsafe {
|
||||
let res = encode_utf8(ch, ptr, len).ok().unwrap();
|
||||
assert_eq!(res, ch.len_utf8());
|
||||
}
|
||||
let string = std::str::from_utf8(&data).unwrap();
|
||||
assert_eq!(string.chars().next(), Some(ch));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_utf8_oob() {
|
||||
// test that we report oob if the buffer is too short
|
||||
let mut data = [0u8; 16];
|
||||
let chars = ['a', 'α', '<27>', '𐍈'];
|
||||
for (len, &ch) in (1..=4).zip(&chars) {
|
||||
assert_eq!(len, ch.len_utf8(), "Len of ch={}", ch);
|
||||
let ptr = data.as_mut_ptr();
|
||||
unsafe {
|
||||
assert!(matches::matches!(encode_utf8(ch, ptr, len - 1), Err(_)));
|
||||
assert!(matches::matches!(encode_utf8(ch, ptr, len), Ok(_)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
use std::fmt;
|
||||
#[cfg(feature="std")]
|
||||
use std::any::Any;
|
||||
#[cfg(feature="std")]
|
||||
use std::error::Error;
|
||||
|
||||
/// Error value indicating insufficient capacity
|
||||
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)]
|
||||
pub struct CapacityError<T = ()> {
|
||||
element: T,
|
||||
}
|
||||
|
||||
impl<T> CapacityError<T> {
|
||||
/// Create a new `CapacityError` from `element`.
|
||||
pub fn new(element: T) -> CapacityError<T> {
|
||||
CapacityError {
|
||||
element: element,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the overflowing element
|
||||
pub fn element(self) -> T {
|
||||
self.element
|
||||
}
|
||||
|
||||
/// Convert into a `CapacityError` that does not carry an element.
|
||||
pub fn simplify(self) -> CapacityError {
|
||||
CapacityError { element: () }
|
||||
}
|
||||
}
|
||||
|
||||
const CAPERROR: &'static str = "insufficient capacity";
|
||||
|
||||
#[cfg(feature="std")]
|
||||
/// Requires `features="std"`.
|
||||
impl<T: Any> Error for CapacityError<T> {
|
||||
fn description(&self) -> &str {
|
||||
CAPERROR
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> fmt::Display for CapacityError<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", CAPERROR)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> fmt::Debug for CapacityError<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}: {}", "CapacityError", CAPERROR)
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,44 @@
|
|||
|
||||
|
||||
use crate::array::Array;
|
||||
use std::mem::MaybeUninit as StdMaybeUninit;
|
||||
|
||||
#[derive(Copy)]
|
||||
pub struct MaybeUninit<T> {
|
||||
inner: StdMaybeUninit<T>,
|
||||
}
|
||||
|
||||
impl<T> Clone for MaybeUninit<T>
|
||||
where T: Copy
|
||||
{
|
||||
fn clone(&self) -> Self { *self }
|
||||
}
|
||||
|
||||
impl<T> MaybeUninit<T> {
|
||||
/// Create a new MaybeUninit with uninitialized interior
|
||||
pub unsafe fn uninitialized() -> Self {
|
||||
MaybeUninit { inner: StdMaybeUninit::uninit() }
|
||||
}
|
||||
|
||||
/// Create a new MaybeUninit from the value `v`.
|
||||
pub fn from(v: T) -> Self {
|
||||
MaybeUninit { inner: StdMaybeUninit::new(v) }
|
||||
}
|
||||
|
||||
// Raw pointer casts written so that we don't reference or access the
|
||||
// uninitialized interior value
|
||||
|
||||
/// Return a raw pointer to the start of the interior array
|
||||
pub fn ptr(&self) -> *const T::Item
|
||||
where T: Array
|
||||
{
|
||||
self.inner.as_ptr() as *const T::Item
|
||||
}
|
||||
|
||||
/// Return a mut raw pointer to the start of the interior array
|
||||
pub fn ptr_mut(&mut self) -> *mut T::Item
|
||||
where T: Array
|
||||
{
|
||||
self.inner.as_mut_ptr() as *mut T::Item
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
#![cfg(feature = "serde")]
|
||||
extern crate arrayvec;
|
||||
extern crate serde_test;
|
||||
|
||||
mod array_vec {
|
||||
use arrayvec::ArrayVec;
|
||||
|
||||
use serde_test::{Token, assert_tokens, assert_de_tokens_error};
|
||||
|
||||
#[test]
|
||||
fn test_ser_de_empty() {
|
||||
let vec = ArrayVec::<[u32; 0]>::new();
|
||||
|
||||
assert_tokens(&vec, &[
|
||||
Token::Seq { len: Some(0) },
|
||||
Token::SeqEnd,
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_ser_de() {
|
||||
let mut vec = ArrayVec::<[u32; 3]>::new();
|
||||
vec.push(20);
|
||||
vec.push(55);
|
||||
vec.push(123);
|
||||
|
||||
assert_tokens(&vec, &[
|
||||
Token::Seq { len: Some(3) },
|
||||
Token::U32(20),
|
||||
Token::U32(55),
|
||||
Token::U32(123),
|
||||
Token::SeqEnd,
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_de_too_large() {
|
||||
assert_de_tokens_error::<ArrayVec<[u32; 2]>>(&[
|
||||
Token::Seq { len: Some(3) },
|
||||
Token::U32(13),
|
||||
Token::U32(42),
|
||||
Token::U32(68),
|
||||
], "invalid length 3, expected an array with no more than 2 items");
|
||||
}
|
||||
}
|
||||
|
||||
mod array_string {
|
||||
use arrayvec::ArrayString;
|
||||
|
||||
use serde_test::{Token, assert_tokens, assert_de_tokens_error};
|
||||
|
||||
#[test]
|
||||
fn test_ser_de_empty() {
|
||||
let string = ArrayString::<[u8; 0]>::new();
|
||||
|
||||
assert_tokens(&string, &[
|
||||
Token::Str(""),
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_ser_de() {
|
||||
let string = ArrayString::<[u8; 9]>::from("1234 abcd")
|
||||
.expect("expected exact specified capacity to be enough");
|
||||
|
||||
assert_tokens(&string, &[
|
||||
Token::Str("1234 abcd"),
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_de_too_large() {
|
||||
assert_de_tokens_error::<ArrayString<[u8; 2]>>(&[
|
||||
Token::Str("afd")
|
||||
], "invalid length 3, expected a string no more than 2 bytes long");
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue