vendor all the shit.

ecs
q3k 2020-01-19 00:27:25 +01:00
parent 5565a77ac4
commit ac8a8b0f5a
5627 changed files with 798741 additions and 0 deletions

View File

@ -0,0 +1 @@
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"a2f9a1263aa35a92af4ffc1935b264f062738bc25761aa62b3d582031d6bf5f0","DESIGN.md":"44d4516ef38d60e9638f756baf40bcd9eff1b8e8ce7538a1d8549e02d6605d48","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"626d74e4bdac78d2446c75c722a7e46d0eaa4e506a1068ff693b5abc338a384f","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"46c57a83a75a8f25fdf19a15deae10748d12b8af9445ae74700a546a92024608","src/automaton.rs":"85e79ceb964f824fcceca026abd255980840116704834d70a1b9c44833df299f","src/buffer.rs":"c40992e7d1ba0bac6d1c268d41069aad81f2226686c64192ed888a60f66db8cd","src/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/classes.rs":"590f2e257bf7c630bea3a28d4a1f75c78db7a0802f5921aced017a056146b4e6","src/dfa.rs":"2fb1077edfefd2b7f7e9c0d9df55df1441d4571500a2c45aa5b41960a36441e4","src/error.rs":"36dbf2cefbfaa8a69186551320dbff023d3e82780a6c925e87c3e3997b967e66","src/lib.rs":"028ab998e8f0d1a98650b139bcca83681cbb52545060b9253b76d7e19117b53d","src/nfa.rs":"6bc3479ad37c576bba4bbdc9e3d0c6e69a4b7f0d9a88fcbbf727bf4a9b288494","src/packed/api.rs":"aa89627c7114c057c98ad1c7ab9ce18c6ed55267a6bcf7bc8efb917b6cfe5532","src/packed/mod.rs":"29c76ad3cbb1f831140cefac7a27fb504ac4af4f454975a571965b48aad417eb","src/packed/pattern.rs":"b88c57af057997da0a5a06f4c5604a7e598c20acfc11c15cd8977727f6e1cf9c","src/packed/rabinkarp.rs":"b3242a8631ea5607163dcbb641e4ac9c6da26774378da1e51651b0ab5656b390","src/packed/teddy/README.md":"5819f40d221af93288e705eadef5393a41d7a0900881b4d676e01fd65d5adf15","src/packed/teddy/compile.rs":"21b18cbee9bc33918b85b1dc51a0faed57acb426f61e6b72aeaf69faa7595701","src/packed/teddy/mod.rs":"f63db3419b1d378929bf0bc1f0e3b909ff3c38b9f2b6e86ba4546b8f39907cd3","src/packed/teddy/runtime.rs":"0a1250ea73159b3be6e0fa9a3f55ecedbb2cb90cb798d1709e9f5ee48f8855d5","src/packed/tests.rs":"0b52ab9eef73a1a4f141f475a9fa98e54d447104aa69acba3a7f8248ce7164b2","src/packed/vector.rs":"ab3c0535fca5f09198d58cbfae44c292aeb3ce44bc92bca36d30dc72963639fc","src/prefilter.rs":"3dbe93d85c6fb985a9aea0b5eab003fe81a228e02adba00c8f63a35c3fd246b8","src/state_id.rs":"ebecd7046760e6bd72303f288be93342b446e7fe95f20b5ce23653d802c48b09","src/tests.rs":"9201cc0662bc9a1e8fa15c59e33a18a55ec6b3bd6bbea294d9cace0053bb8d24"},"package":"58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"}

View File

@ -0,0 +1,47 @@
"""
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # "MIT"
"unencumbered", # "Unlicense"
])
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_library",
"rust_binary",
"rust_test",
)
rust_library(
name = "aho_corasick",
crate_root = "src/lib.rs",
crate_type = "lib",
edition = "2015",
srcs = glob(["**/*.rs"]),
deps = [
"//third_party/cargo/vendor/memchr-2.3.0:memchr",
],
rustc_flags = [
"--cap-lints=allow",
],
version = "0.7.6",
crate_features = [
"default",
"memchr",
"std",
],
)

View File

@ -0,0 +1,3 @@
This project is dual-licensed under the Unlicense and MIT licenses.
You may use this code under the terms of either license.

View File

@ -0,0 +1,47 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "aho-corasick"
version = "0.7.6"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
exclude = ["/aho-corasick-debug", "/ci/*", "/.travis.yml", "/appveyor.yml"]
autotests = false
description = "Fast multiple substring searching."
homepage = "https://github.com/BurntSushi/aho-corasick"
readme = "README.md"
keywords = ["string", "search", "text", "aho", "multi"]
categories = ["text-processing"]
license = "Unlicense/MIT"
repository = "https://github.com/BurntSushi/aho-corasick"
[profile.bench]
debug = true
[profile.release]
debug = true
[lib]
name = "aho_corasick"
[dependencies.memchr]
version = "2.2.0"
default-features = false
[dev-dependencies.doc-comment]
version = "0.3.1"
[features]
default = ["std"]
std = ["memchr/use_std"]
[badges.appveyor]
repository = "BurntSushi/aho-corasick"
[badges.travis-ci]
repository = "BurntSushi/aho-corasick"

View File

@ -0,0 +1,483 @@
This document describes the internal design of this crate, which is an object
lesson in what happens when you take a fairly simple old algorithm like
Aho-Corasick and make it fast and production ready.
The target audience of this crate is Rust programmers that have some
familiarity with string searching, however, one does not need to know the
Aho-Corasick algorithm in order to read this (it is explained below). One
should, however, know what a trie is. (If you don't, go read its Wikipedia
article.)
The center-piece of this crate is an implementation of Aho-Corasick. On its
own, Aho-Corasick isn't that complicated. The complex pieces come from the
different variants of Aho-Corasick implemented in this crate. Specifically,
they are:
* Aho-Corasick as an NFA, using dense transitions near root with sparse
transitions elsewhere.
* Aho-Corasick as a DFA. (An NFA is slower to search, but cheaper to construct
and uses less memory.)
* A DFA with pre-multiplied state identifiers. This saves a multiplication
instruction in the core search loop.
* A DFA with equivalence classes of bytes as the alphabet, instead of the
traditional 256-byte alphabet. This shrinks the size of the DFA in memory,
but adds an extra lookup in the core search loop to map the input byte to
an equivalent class.
* The option to choose how state identifiers are represented, via one of
u8, u16, u32, u64 or usize. This permits creating compact automatons when
matching a small number of patterns.
* Supporting "standard" match semantics, along with its overlapping variant,
in addition to leftmost-first and leftmost-longest semantics. The "standard"
semantics are typically what you see in a textbook description of
Aho-Corasick. However, Aho-Corasick is also useful as an optimization in
regex engines, which often use leftmost-first or leftmost-longest semantics.
Thus, it is useful to implement those semantics here. The "standard" and
"leftmost" search algorithms are subtly different, and also require slightly
different construction algorithms.
* Support for ASCII case insensitive matching.
* Support for accelerating searches when the patterns all start with a small
number of fixed bytes. Or alternatively, when the patterns all contain a
small number of rare bytes. (Searching for these bytes uses SIMD vectorized
code courtesy of `memchr`.)
* Transparent support for alternative SIMD vectorized search routines for
smaller number of literals, such as the Teddy algorithm. We called these
"packed" search routines because they use SIMD. They can often be an order of
magnitude faster than just Aho-Corasick, but don't scale as well.
* Support for searching streams. This can reuse most of the underlying code,
but does require careful buffering support.
* Support for anchored searches, which permit efficient `is_prefix` checks for
a large number of patterns.
When you combine all of this together along with trying to make everything as
fast as possible, what you end up with is enitrely too much code with too much
`unsafe`. Alas, I was not smart enough to figure out how to reduce it. Instead,
we will explain it.
# Basics
The fundamental problem this crate is trying to solve is to determine the
occurrences of possibly many patterns in a haystack. The naive way to solve
this is to look for a match for each pattern at each position in the haystack:
for i in 0..haystack.len():
for p in patterns.iter():
if haystack[i..].starts_with(p.bytes()):
return Match(p.id(), i, i + p.bytes().len())
Those four lines are effectively all this crate does. The problem with those
four lines is that they are very slow, especially when you're searching for a
large number of patterns.
While there are many different algorithms available to solve this, a popular
one is Aho-Corasick. It's a common solution because it's not too hard to
implement, scales quite well even when searching for thousands of patterns and
is generally pretty fast. Aho-Corasick does well here because, regardless of
the number of patterns you're searching for, it always visits each byte in the
haystack exactly ocne. This means, generally speaking, adding more patterns to
an Aho-Corasick automaton does not make it slower. (Strictly speaking, however,
this is not true, since a larger automaton will make less effective use of the
CPU's cache.)
Aho-Corasick can be succinctly described as a trie with state transitions
between some of the nodes that efficiently instruct the search algorithm to
try matching alternative keys in the automaton. The trick is that these state
transitions are arranged such that each byte of input needs to be inspected
only once. These state transitions are typically called "failure transitions,"
because they instruct the searcher (the thing traversing the automaton while
reading from the haystack) what to do when a byte in the haystack does not
correspond to a valid transition in the current state of the trie.
More formally, a failure transition points to a state in the automaton that may
lead to a match whose prefix is a proper suffix of the path traversed through
the trie so far. (If no such proper suffix exists, then the failure transition
points back to the start state of the trie, effectively restarting the search.)
This is perhaps simpler to explain pictorally. For example, let's say we built
an Aho-Corasick automaton with the following patterns: 'abcd' and 'cef'. The
trie looks like this:
a - S1 - b - S2 - c - S3 - d - S4*
/
S0 - c - S5 - e - S6 - f - S7*
where states marked with a `*` are match states (meaning, the search algorithm
should stop and report a match to the caller).
So given this trie, it should be somewhat straight-forward to see how it can
be used to determine whether any particular haystack *starts* with either
`abcd` or `cef`. It's easy to express this in code:
fn has_prefix(trie: &Trie, haystack: &[u8]) -> bool {
let mut state_id = trie.start();
// If the empty pattern is in trie, then state_id is a match state.
if trie.is_match(state_id) {
return true;
}
for (i, &b) in haystack.iter().enumerate() {
state_id = match trie.next_state(state_id, b) {
Some(id) => id,
// If there was no transition for this state and byte, then we know
// the haystack does not start with one of the patterns in our trie.
None => return false,
};
if trie.is_match(state_id) {
return true;
}
}
false
}
And that's pretty much it. All we do is move through the trie starting with the
bytes at the beginning of the haystack. If we find ourselves in a position
where we can't move, or if we've looked through the entire haystack without
seeing a match state, then we know the haystack does not start with any of the
patterns in the trie.
The meat of the Aho-Corasick algorithm is in how we add failure transitions to
our trie to keep searching efficient. Specifically, it permits us to not only
check whether a haystack *starts* with any one of a number of patterns, but
rather, whether the haystack contains any of a number of patterns *anywhere* in
the haystack.
As mentioned before, failure transitions connect a proper suffix of the path
traversed through the trie before, with a path that leads to a match that has a
prefix corresponding to that proper suffix. So in our case, for patterns `abcd`
and `cef`, with a haystack `abcef`, we want to transition to state `S5` (from
the diagram above) from `S3` upon seeing that the byte following `c` is not
`d`. Namely, the proper suffix in this example is `c`, which is a prefix of
`cef`. So the modified diagram looks like this:
a - S1 - b - S2 - c - S3 - d - S4*
/ /
/ ----------------
/ /
S0 - c - S5 - e - S6 - f - S7*
One thing that isn't shown in this diagram is that *all* states have a failure
transition, but only `S3` has a *non-trivial* failure transition. That is, all
other states have a failure transition back to the start state. So if our
haystack was `abzabcd`, then the searcher would transition back to `S0` after
seeing `z`, which effectively restarts the search. (Because there is no pattern
in our trie that has a prefix of `bz` or `z`.)
The code for traversing this *automaton* or *finite state machine* (it is no
longer just a trie) is not that much different from the `has_prefix` code
above:
fn contains(fsm: &FiniteStateMachine, haystack: &[u8]) -> bool {
let mut state_id = fsm.start();
// If the empty pattern is in fsm, then state_id is a match state.
if fsm.is_match(state_id) {
return true;
}
for (i, &b) in haystack.iter().enumerate() {
// While the diagram above doesn't show this, we may wind up needing
// to follow multiple failure transitions before we land on a state
// in which we can advance. Therefore, when searching for the next
// state, we need to loop until we don't see a failure transition.
//
// This loop terminates because the start state has no empty
// transitions. Every transition from the start state either points to
// another state, or loops back to the start state.
loop {
match fsm.next_state(state_id, b) {
Some(id) => {
state_id = id;
break;
}
// Unlike our code above, if there was no transition for this
// state, then we don't quit. Instead, we look for this state's
// failure transition and follow that instead.
None => {
state_id = fsm.next_fail_state(state_id);
}
};
}
if fsm.is_match(state_id) {
return true;
}
}
false
}
Other than the complication around traversing failure transitions, this code
is still roughly "traverse the automaton with bytes from the haystack, and quit
when a match is seen."
And that concludes our section on the basics. While we didn't go deep into
how the automaton is built (see `src/nfa.rs`, which has detailed comments about
that), the basic structure of Aho-Corasick should be reasonably clear.
# NFAs and DFAs
There are generally two types of finite automata: non-deterministic finite
automata (NFA) and deterministic finite automata (DFA). The difference between
them is, principally, that an NFA can be in multiple states at once. This is
typically accomplished by things called _epsilon_ transitions, where one could
move to a new state without consuming any bytes from the input. (The other
mechanism by which NFAs can be in more than one state is where the same byte in
a particular state transitions to multiple distinct states.) In contrast, a DFA
can only ever be in one state at a time. A DFA has no epsilon transitions, and
for any given state, a byte transitions to at most one other state.
By this formulation, the Aho-Corasick automaton described in the previous
section is an NFA. This is because failure transitions are, effectively,
epsilon transitions. That is, whenever the automaton is in state `S`, it is
actually in the set of states that are reachable by recursively following
failure transitions from `S`. (This means that, for example, the start state
is always active since the start state is reachable via failure transitions
from any state in the automaton.)
NFAs have a lot of nice properties. They tend to be easier to construct, and
also tend to use less memory. However, their primary downside is that they are
typically slower to execute. For example, the code above showing how to search
with an Aho-Corasick automaton needs to potentially iterate through many
failure transitions for every byte of input. While this is a fairly small
amount of overhead, this can add up, especially if the automaton has a lot of
overlapping patterns with a lot of failure transitions.
A DFA's search code, by contrast, looks like this:
fn contains(dfa: &DFA, haystack: &[u8]) -> bool {
let mut state_id = dfa.start();
// If the empty pattern is in dfa, then state_id is a match state.
if dfa.is_match(state_id) {
return true;
}
for (i, &b) in haystack.iter().enumerate() {
// An Aho-Corasick DFA *never* has a missing state that requires
// failure transitions to be followed. One byte of input advances the
// automaton by one state. Always.
state_id = trie.next_state(state_id, b);
if fsm.is_match(state_id) {
return true;
}
}
false
}
The search logic here is much simpler than for the NFA, and this tends to
translate into significant performance benefits as well, since there's a lot
less work being done for each byte in the haystack. How is this accomplished?
It's done by pre-following all failure transitions for all states for all bytes
in the alphabet, and then building a single state transition table. Building
this DFA can be much more costly than building the NFA, and use much more
memory, but the better performance can be worth it.
Users of this crate can actually choose between using an NFA or a DFA. By
default, an NFA is used, because it typically strikes the best balance between
space usage and search performance. But the DFA option is available for cases
where a little extra memory and upfront time building the automaton is okay.
For example, the `AhoCorasick::auto_configure` and
`AhoCorasickBuilder::auto_configure` methods will enable the DFA setting if
there are a small number of patterns.
# More DFA tricks
As described in the previous section, one of the downsides of using a DFA is
that is uses more memory and can take longer to builder. One small way of
mitigating these concerns is to map the alphabet used by the automaton into a
smaller space. Typically, the alphabet of a DFA has 256 elements in it: one
element for each possible value that fits into a byte. However, in many cases,
one does not need the full alphabet. For example, if all patterns in an
Aho-Corasick automaton are ASCII letters, then this only uses up 52 distinct
bytes. As far as the automaton is concerned, the rest of the 204 bytes are
indistinguishable from one another: they will never disrciminate between a
match or a non-match. Therefore, in cases like that, the alphabet can be shrunk
to just 53 elements. One for each ASCII letter, and then another to serve as a
placeholder for every other unused byte.
In practice, this library doesn't quite compute the optimal set of equivalence
classes, but it's close enough in most cases. The key idea is that this then
allows the transition table for the DFA to be potentially much smaller. The
downside of doing this, however, is that since the transition table is defined
in terms of this smaller alphabet space, every byte in the haystack must be
re-mapped to this smaller space. This requires an additional 256-byte table.
In practice, this can lead to a small search time hit, but it can be difficult
to measure. Moreover, it can sometimes lead to faster search times for bigger
automata, since it could be difference between more parts of the automaton
staying in the CPU cache or not.
One other trick for DFAs employed by this crate is the notion of premultiplying
state identifiers. Specifically, the normal way to compute the next transition
in a DFA is via the following (assuming that the transition table is laid out
sequentially in memory, in row-major order, where the rows are states):
next_state_id = dfa.transitions[current_state_id * 256 + current_byte]
However, since the value `256` is a fixed constant, we can actually premultiply
the state identifiers in the table when we build the table initially. Then, the
next transition computation simply becomes:
next_state_id = dfa.transitions[current_state_id + current_byte]
This doesn't seem like much, but when this is being executed for every byte of
input that you're searching, saving that extra multiplication instruction can
add up.
The same optimization works even when equivalence classes are enabled, as
described above. The only difference is that the premultiplication is by the
total number of equivalence classes instead of 256.
There isn't much downside to premultiplying state identifiers, other than the
fact that you may need to choose a bigger integer representation than you would
otherwise. For example, if you don't premultiply state identifiers, then an
automaton that uses `u8` as a state identifier can hold up to 256 states.
However, if they are premultiplied, then it can only hold up to
`floor(256 / len(alphabet))` states. Thus premultiplication impacts how compact
your DFA can be. In practice, it's pretty rare to use `u8` as a state
identifier, so premultiplication is usually a good thing to do.
Both equivalence classes and premultiplication are tuneable parameters via the
`AhoCorasickBuilder` type, and both are enabled by default.
# Match semantics
One of the more interesting things about this implementation of Aho-Corasick
that (as far as this author knows) separates it from other implementations, is
that it natively supports leftmost-first and leftmost-longest match semantics.
Briefly, match semantics refer to the decision procedure by which searching
will disambiguate matches when there are multiple to choose from:
* **standard** match semantics emits matches as soon as they are detected by
the automaton. This is typically equivalent to the textbook non-overlapping
formulation of Aho-Corasick.
* **leftmost-first** match semantics means that 1) the next match is the match
starting at the leftmost position and 2) among multiple matches starting at
the same leftmost position, the match corresponding to the pattern provided
first by the caller is reported.
* **leftmost-longest** is like leftmost-first, except when there are multiple
matches starting at the same leftmost position, the pattern corresponding to
the longest match is returned.
(The crate API documentation discusses these differences, with examples, in
more depth on the `MatchKind` type.)
The reason why supporting these match semantics is important is because it
gives the user more control over the match procedure. For example,
leftmost-first permits users to implement match priority by simply putting the
higher priority patterns first. Leftmost-longest, on the other hand, permits
finding the longest possible match, which might be useful when trying to find
words matching a dictionary. Additionally, regex engines often want to use
Aho-Corasick as an optimization when searching for an alternation of literals.
In order to preserve correct match semantics, regex engines typically can't use
the standard textbook definition directly, since regex engines will implement
either leftmost-first (Perl-like) or leftmost-longest (POSIX) match semantics.
Supporting leftmost semantics requires a couple key changes:
* Constructing the Aho-Corasick automaton changes a bit in both how the trie is
constructed and how failure transitions are found. Namely, only a subset of
the failure transitions are added. Specifically, only the failure transitions
that either do not occur after a match or do occur after a match but preserve
that match are kept. (More details on this can be found in `src/nfa.rs`.)
* The search algorithm changes slightly. Since we are looking for the leftmost
match, we cannot quit as soon as a match is detected. Instead, after a match
is detected, we must keep searching until either the end of the input or
until a dead state is seen. (Dead states are not used for standard match
semantics. Dead states mean that searching should stop after a match has been
found.)
Other implementations of Aho-Corasick do support leftmost match semantics, but
they do it with more overhead at search time, or even worse, with a queue of
matches and sophisticated hijinks to disambiguate the matches. While our
construction algorithm becomes a bit more complicated, the correct match
semantics fall out from the structure of the automaton itself.
# Overlapping matches
One of the nice properties of an Aho-Corasick automaton is that it can report
all possible matches, even when they overlap with one another. In this mode,
the match semantics don't matter, since all possible matches are reported.
Overlapping searches work just like regular searches, except the state
identifier at which the previous search left off is carried over to the next
search, so that it can pick up where it left off. If there are additional
matches at that state, then they are reported before resuming the search.
Enabling leftmost-first or leftmost-longest match semantics causes the
automaton to use a subset of all failure transitions, which means that
overlapping searches cannot be used. Therefore, if leftmost match semantics are
used, attempting to do an overlapping search will panic. Thus, to get
overlapping searches, the caller must use the default standard match semantics.
This behavior was chosen because there are only two alternatives, which were
deemed worse:
* Compile two automatons internally, one for standard semantics and one for
the semantics requested by the caller (if not standard).
* Create a new type, distinct from the `AhoCorasick` type, which has different
capabilities based on the configuration options.
The first is untenable because of the amount of memory used by the automaton.
The second increases the complexity of the API too much by adding too many
types that do similar things. It is conceptually much simpler to keep all
searching isolated to a single type. Callers may query whether the automaton
supports overlapping searches via the `AhoCorasick::supports_overlapping`
method.
# Stream searching
Since Aho-Corasick is an automaton, it is possible to do partial searches on
partial parts of the haystack, and then resume that search on subsequent pieces
of the haystack. This is useful when the haystack you're trying to search is
not stored contiguous in memory, or if one does not want to read the entire
haystack into memory at once.
Currently, only standard semantics are supported for stream searching. This is
some of the more complicated code in this crate, and is something I would very
much like to improve. In particular, it currently has the restriction that it
must buffer at least enough of the haystack in memory in order to fit the
longest possible match. The difficulty in getting stream searching right is
that the implementation choices (such as the buffer size) often impact what the
API looks like and what it's allowed to do.
# Prefilters
In some cases, Aho-Corasick is not the fastest way to find matches containing
multiple patterns. Sometimes, the search can be accelerated using highly
optimized SIMD routines. For example, consider searching the following
patterns:
Sherlock
Moriarty
Watson
It is plausible that it would be much faster to quickly look for occurrences of
the leading bytes, `S`, `M` or `W`, before trying to start searching via the
automaton. Indeed, this is exactly what this crate will do.
When there are more than three distinct starting bytes, then this crate will
look for three distinct bytes occurring at any position in the patterns, while
preferring bytes that are heuristically determined to be rare over others. For
example:
Abuzz
Sanchez
Vasquez
Topaz
Waltz
Here, we have more than 3 distinct starting bytes, but all of the patterns
contain `z`, which is typically a rare byte. In this case, the prefilter will
scan for `z`, back up a bit, and then execute the Aho-Corasick automaton.
If all of that fails, then a packed multiple substring algorithm will be
attempted. Currently, the only algorithm available for this is Teddy, but more
may be added in the future. Teddy is unlike the above prefilters in that it
confirms its own matches, so when Teddy is active, it might not be necessary
for Aho-Corasick to run at all. (See `Automaton::leftmost_find_at_no_state_imp`
in `src/automaton.rs`.) However, the current Teddy implementation only works
in `x86_64` and when SSSE3 or AVX2 are available, and moreover, only works
_well_ when there are a small number of patterns (say, less than 100). Teddy
also requires the haystack to be of a certain length (more than 16-34 bytes).
When the haystack is shorter than that, Rabin-Karp is used instead. (See
`src/packed/rabinkarp.rs`.)
There is a more thorough description of Teddy at
[`src/packed/teddy/README.md`](src/packed/teddy/README.md).

View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2015 Andrew Gallant
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -0,0 +1,184 @@
aho-corasick
============
A library for finding occurrences of many patterns at once with SIMD
acceleration in some cases. This library provides multiple pattern
search principally through an implementation of the
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
which builds a finite state machine for executing searches in linear time.
Features include case insensitive matching, overlapping matches and search &
replace in streams.
[![Linux build status](https://api.travis-ci.org/BurntSushi/aho-corasick.svg)](https://travis-ci.org/BurntSushi/aho-corasick)
[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/aho-corasick?svg=true)](https://ci.appveyor.com/project/BurntSushi/aho-corasick)
[![](http://meritbadge.herokuapp.com/aho-corasick)](https://crates.io/crates/aho-corasick)
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
### Documentation
https://docs.rs/aho-corasick
### Usage
Add this to your `Cargo.toml`:
```toml
[dependencies]
aho-corasick = "0.7"
```
and this to your crate root (if you're using Rust 2015):
```rust
extern crate aho_corasick;
```
### Example: basic searching
This example shows how to search for occurrences of multiple patterns
simultaneously. Each match includes the pattern that matched along with the
byte offsets of the match.
```rust
use aho_corasick::AhoCorasick;
let patterns = &["apple", "maple", "Snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasick::new(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
### Example: case insensitivity
This is like the previous example, but matches `Snapple` case insensitively
using `AhoCorasickBuilder`:
```rust
use aho_corasick::AhoCorasickBuilder;
let patterns = &["apple", "maple", "snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
### Example: replacing matches in a stream
This example shows how to execute a search and replace on a stream without
loading the entire stream into memory first.
```rust
use aho_corasick::AhoCorasick;
# fn example() -> Result<(), ::std::io::Error> {
let patterns = &["fox", "brown", "quick"];
let replace_with = &["sloth", "grey", "slow"];
// In a real example, these might be `std::fs::File`s instead. All you need to
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
let rdr = "The quick brown fox.";
let mut wtr = vec![];
let ac = AhoCorasick::new(patterns);
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
# Ok(()) }; example().unwrap()
```
### Example: finding the leftmost first match
In the textbook description of Aho-Corasick, its formulation is typically
structured such that it reports all possible matches, even when they overlap
with another. In many cases, overlapping matches may not be desired, such as
the case of finding all successive non-overlapping matches like you might with
a standard regular expression.
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
this doesn't always work in the expected way, since it will report matches as
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
against the text `Samwise`. Most regex engines (that are Perl-like, or
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
algorithm modified for reporting non-overlapping matches will report `Sam`.
A novel contribution of this library is the ability to change the match
semantics of Aho-Corasick (without additional search time overhead) such that
`Samwise` is reported instead. For example, here's the standard approach:
```rust
use aho_corasick::AhoCorasick;
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasick::new(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
```
And now here's the leftmost-first version, which matches how a Perl-like
regex will work:
```rust
use aho_corasick::{AhoCorasickBuilder, MatchKind};
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
```
In addition to leftmost-first semantics, this library also supports
leftmost-longest semantics, which match the POSIX behavior of a regular
expression alternation. See `MatchKind` in the docs for more details.
### Minimum Rust version policy
This crate's minimum supported `rustc` version is `1.28.0`.
In general, this crate will be conservative with respect to the minimum
supported version of Rust. In general, it will follow the `regex` crate's
policy, since `regex` is an important dependent.
### Future work
Here are some plans for the future:
* Assuming the current API is sufficient, I'd like to commit to it and release
a `1.0` version of this crate some time in the next 6-12 months.
* Support stream searching with leftmost match semantics. Currently, only
standard match semantics are supported. Getting this right seems possible,
but is tricky since the match state needs to be propagated through multiple
searches. (With standard semantics, as soon as a match is seen the search
ends.)

View File

@ -0,0 +1,24 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>

View File

@ -0,0 +1,2 @@
max_width = 79
use_small_heuristics = "max"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,589 @@
use ahocorasick::MatchKind;
use prefilter::{self, Candidate, Prefilter, PrefilterState};
use state_id::{dead_id, fail_id, StateID};
use Match;
// NOTE: This trait essentially started as a copy of the same trait from from
// regex-automata, with some wording changed since we use this trait for
// NFAs in addition to DFAs in this crate. Additionally, we do not export
// this trait. It's only used internally to reduce code duplication. The
// regex-automata crate needs to expose it because its Regex type is generic
// over implementations of this trait. In this crate, we encapsulate everything
// behind the AhoCorasick type.
//
// This trait is a bit of a mess, but it's not quite clear how to fix it.
// Basically, there are several competing concerns:
//
// * We need performance, so everything effectively needs to get monomorphized.
// * There are several variations on searching Aho-Corasick automatons:
// overlapping, standard and leftmost. Overlapping and standard are somewhat
// combined together below, but there is no real way to combine standard with
// leftmost. Namely, leftmost requires continuing a search even after a match
// is found, in order to correctly disambiguate a match.
// * On top of that, *sometimes* callers want to know which state the automaton
// is in after searching. This is principally useful for overlapping and
// stream searches. However, when callers don't care about this, we really
// do not want to be forced to compute it, since it sometimes requires extra
// work. Thus, there are effectively two copies of leftmost searching: one
// for tracking the state ID and one that doesn't. We should ideally do the
// same for standard searching, but my sanity stopped me.
/// A trait describing the interface of an Aho-Corasick finite state machine.
///
/// Every automaton has exactly one fail state, one dead state and exactly one
/// start state. Generally, these correspond to the first, second and third
/// states, respectively. The failure state is always treated as a sentinel.
/// That is, no correct Aho-Corasick automaton will ever transition into the
/// fail state. The dead state, however, can be transitioned into, but only
/// when leftmost-first or leftmost-longest match semantics are enabled and
/// only when at least one match has been observed.
///
/// Every automaton also has one or more match states, such that
/// `Automaton::is_match_state_unchecked(id)` returns `true` if and only if
/// `id` corresponds to a match state.
pub trait Automaton {
/// The representation used for state identifiers in this automaton.
///
/// Typically, this is one of `u8`, `u16`, `u32`, `u64` or `usize`.
type ID: StateID;
/// The type of matching that should be done.
fn match_kind(&self) -> &MatchKind;
/// Returns true if and only if this automaton uses anchored searches.
fn anchored(&self) -> bool;
/// An optional prefilter for quickly skipping to the next candidate match.
/// A prefilter must report at least every match, although it may report
/// positions that do not correspond to a match. That is, it must not allow
/// false negatives, but can allow false positives.
///
/// Currently, a prefilter only runs when the automaton is in the start
/// state. That is, the position reported by a prefilter should always
/// correspond to the start of a potential match.
fn prefilter(&self) -> Option<&dyn Prefilter>;
/// Return the identifier of this automaton's start state.
fn start_state(&self) -> Self::ID;
/// Returns true if and only if the given state identifier refers to a
/// valid state.
fn is_valid(&self, id: Self::ID) -> bool;
/// Returns true if and only if the given identifier corresponds to a match
/// state.
///
/// The state ID given must be valid, or else implementors may panic.
fn is_match_state(&self, id: Self::ID) -> bool;
/// Returns true if and only if the given identifier corresponds to a state
/// that is either the dead state or a match state.
///
/// Depending on the implementation of the automaton, this routine can
/// be used to save a branch in the core matching loop. Nevertheless,
/// `is_match_state(id) || id == dead_id()` is always a valid
/// implementation. Indeed, this is the default implementation.
///
/// The state ID given must be valid, or else implementors may panic.
fn is_match_or_dead_state(&self, id: Self::ID) -> bool {
id == dead_id() || self.is_match_state(id)
}
/// If the given state is a match state, return the match corresponding
/// to the given match index. `end` must be the ending position of the
/// detected match. If no match exists or if `match_index` exceeds the
/// number of matches in this state, then `None` is returned.
///
/// The state ID given must be valid, or else implementors may panic.
///
/// If the given state ID is correct and if the `match_index` is less than
/// the number of matches for that state, then this is guaranteed to return
/// a match.
fn get_match(
&self,
id: Self::ID,
match_index: usize,
end: usize,
) -> Option<Match>;
/// Returns the number of matches for the given state. If the given state
/// is not a match state, then this returns 0.
///
/// The state ID given must be valid, or else implementors must panic.
fn match_count(&self, id: Self::ID) -> usize;
/// Given the current state that this automaton is in and the next input
/// byte, this method returns the identifier of the next state. The
/// identifier returned must always be valid and may never correspond to
/// the fail state. The returned identifier may, however, point to the
/// dead state.
///
/// This is not safe so that implementors may look up the next state
/// without memory safety checks such as bounds checks. As such, callers
/// must ensure that the given identifier corresponds to a valid automaton
/// state. Implementors must, in turn, ensure that this routine is safe for
/// all valid state identifiers and for all possible `u8` values.
unsafe fn next_state_unchecked(
&self,
current: Self::ID,
input: u8,
) -> Self::ID;
/// Like next_state_unchecked, but debug_asserts that the underlying
/// implementation never returns a `fail_id()` for the next state.
unsafe fn next_state_unchecked_no_fail(
&self,
current: Self::ID,
input: u8,
) -> Self::ID {
let next = self.next_state_unchecked(current, input);
// We should never see a transition to the failure state.
debug_assert!(
next != fail_id(),
"automaton should never return fail_id for next state"
);
next
}
/// Execute a search using standard match semantics.
///
/// This can be used even when the automaton was constructed with leftmost
/// match semantics when you want to find the earliest possible match. This
/// can also be used as part of an overlapping search implementation.
///
/// N.B. This does not report a match if `state_id` is given as a matching
/// state. As such, this should not be used directly.
#[inline(always)]
fn standard_find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut Self::ID,
) -> Option<Match> {
if let Some(pre) = self.prefilter() {
self.standard_find_at_imp(
prestate,
Some(pre),
haystack,
at,
state_id,
)
} else {
self.standard_find_at_imp(prestate, None, haystack, at, state_id)
}
}
// It's important for this to always be inlined. Namely, it's only caller
// is standard_find_at, and the inlining should remove the case analysis
// for prefilter scanning when there is no prefilter available.
#[inline(always)]
fn standard_find_at_imp(
&self,
prestate: &mut PrefilterState,
prefilter: Option<&dyn Prefilter>,
haystack: &[u8],
at: usize,
state_id: &mut Self::ID,
) -> Option<Match> {
// This is necessary for guaranteeing a safe API, since we use the
// state ID below in a function that exhibits UB if called with an
// invalid state ID.
assert!(
self.is_valid(*state_id),
"{} is not a valid state ID",
state_id.to_usize()
);
unsafe {
let start = haystack.as_ptr();
let end = haystack[haystack.len()..].as_ptr();
let mut ptr = haystack[at..].as_ptr();
while ptr < end {
if let Some(pre) = prefilter {
let at = ptr as usize - start as usize;
if prestate.is_effective(at)
&& *state_id == self.start_state()
{
let c = prefilter::next(prestate, pre, haystack, at)
.into_option();
match c {
None => return None,
Some(i) => {
ptr = start.offset(i as isize);
}
}
}
}
// SAFETY: next_state is safe for all possible u8 values,
// so the only thing we're concerned about is the validity
// of `state_id`. `state_id` either comes from the caller
// (in which case, we assert above that it is valid), or it
// comes from the return value of next_state, which is also
// guaranteed to be valid.
*state_id = self.next_state_unchecked_no_fail(*state_id, *ptr);
ptr = ptr.offset(1);
// This routine always quits immediately after seeing a
// match, and since dead states can only come after seeing
// a match, seeing a dead state here is impossible. (Unless
// we have an anchored automaton, in which case, dead states
// are used to stop a search.)
debug_assert!(
*state_id != dead_id() || self.anchored(),
"standard find should never see a dead state"
);
if self.is_match_or_dead_state(*state_id) {
return if *state_id == dead_id() {
None
} else {
let end = ptr as usize - start as usize;
self.get_match(*state_id, 0, end)
};
}
}
None
}
}
/// Execute a search using leftmost (either first or longest) match
/// semantics.
///
/// The principle difference between searching with standard semantics and
/// searching with leftmost semantics is that leftmost searching will
/// continue searching even after a match has been found. Once a match
/// is found, the search does not stop until either the haystack has been
/// exhausted or a dead state is observed in the automaton. (Dead states
/// only exist in automatons constructed with leftmost semantics.) That is,
/// we rely on the construction of the automaton to tell us when to quit.
#[inline(never)]
fn leftmost_find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut Self::ID,
) -> Option<Match> {
if let Some(pre) = self.prefilter() {
self.leftmost_find_at_imp(
prestate,
Some(pre),
haystack,
at,
state_id,
)
} else {
self.leftmost_find_at_imp(prestate, None, haystack, at, state_id)
}
}
// It's important for this to always be inlined. Namely, it's only caller
// is leftmost_find_at, and the inlining should remove the case analysis
// for prefilter scanning when there is no prefilter available.
#[inline(always)]
fn leftmost_find_at_imp(
&self,
prestate: &mut PrefilterState,
prefilter: Option<&dyn Prefilter>,
haystack: &[u8],
at: usize,
state_id: &mut Self::ID,
) -> Option<Match> {
debug_assert!(self.match_kind().is_leftmost());
// This is necessary for guaranteeing a safe API, since we use the
// state ID below in a function that exhibits UB if called with an
// invalid state ID.
assert!(
self.is_valid(*state_id),
"{} is not a valid state ID",
state_id.to_usize()
);
if self.anchored() && at > 0 && *state_id == self.start_state() {
return None;
}
unsafe {
let start = haystack.as_ptr();
let end = haystack[haystack.len()..].as_ptr();
let mut ptr = haystack[at..].as_ptr();
let mut last_match = self.get_match(*state_id, 0, at);
while ptr < end {
if let Some(pre) = prefilter {
let at = ptr as usize - start as usize;
if prestate.is_effective(at)
&& *state_id == self.start_state()
{
let c = prefilter::next(prestate, pre, haystack, at)
.into_option();
match c {
None => return None,
Some(i) => {
ptr = start.offset(i as isize);
}
}
}
}
// SAFETY: next_state is safe for all possible u8 values,
// so the only thing we're concerned about is the validity
// of `state_id`. `state_id` either comes from the caller
// (in which case, we assert above that it is valid), or it
// comes from the return value of next_state, which is also
// guaranteed to be valid.
*state_id = self.next_state_unchecked_no_fail(*state_id, *ptr);
ptr = ptr.offset(1);
if self.is_match_or_dead_state(*state_id) {
if *state_id == dead_id() {
// The only way to enter into a dead state is if a
// match has been found, so we assert as much. This
// is different from normal automata, where you might
// enter a dead state if you know a subsequent match
// will never be found (regardless of whether a match
// has already been found). For Aho-Corasick, it is
// built so that we can match at any position, so the
// possibility of a match always exists.
//
// (Unless we have an anchored automaton, in which
// case, dead states are used to stop a search.)
debug_assert!(
last_match.is_some() || self.anchored(),
"failure state should only be seen after match"
);
return last_match;
}
let end = ptr as usize - start as usize;
last_match = self.get_match(*state_id, 0, end);
}
}
last_match
}
}
/// This is like leftmost_find_at, but does not need to track a caller
/// provided state id. In other words, the only output of this routine is a
/// match, if one exists.
///
/// It is regrettable that we need to effectively copy a chunk of
/// implementation twice, but when we don't need to track the state ID, we
/// can allow the prefilter to report matches immediately without having
/// to re-confirm them with the automaton. The re-confirmation step is
/// necessary in leftmost_find_at because tracing through the automaton is
/// the only way to correctly set the state ID. (Perhaps an alternative
/// would be to keep a map from pattern ID to matching state ID, but that
/// complicates the code and still doesn't permit us to defer to the
/// prefilter entirely when possible.)
///
/// I did try a few things to avoid the code duplication here, but nothing
/// optimized as well as this approach. (In microbenchmarks, there was
/// about a 25% difference.)
#[inline(never)]
fn leftmost_find_at_no_state(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Option<Match> {
if let Some(pre) = self.prefilter() {
self.leftmost_find_at_no_state_imp(
prestate,
Some(pre),
haystack,
at,
)
} else {
self.leftmost_find_at_no_state_imp(prestate, None, haystack, at)
}
}
// It's important for this to always be inlined. Namely, it's only caller
// is leftmost_find_at_no_state, and the inlining should remove the case
// analysis for prefilter scanning when there is no prefilter available.
#[inline(always)]
fn leftmost_find_at_no_state_imp(
&self,
prestate: &mut PrefilterState,
prefilter: Option<&dyn Prefilter>,
haystack: &[u8],
at: usize,
) -> Option<Match> {
debug_assert!(self.match_kind().is_leftmost());
if self.anchored() && at > 0 {
return None;
}
// If our prefilter handles confirmation of matches 100% of the
// time, and since we don't need to track state IDs, we can avoid
// Aho-Corasick completely.
if let Some(pre) = prefilter {
// We should never have a prefilter during an anchored search.
debug_assert!(!self.anchored());
if !pre.reports_false_positives() {
return match pre.next_candidate(prestate, haystack, at) {
Candidate::None => None,
Candidate::Match(m) => Some(m),
Candidate::PossibleStartOfMatch(_) => unreachable!(),
};
}
}
let mut state_id = self.start_state();
unsafe {
let start = haystack.as_ptr();
let end = haystack[haystack.len()..].as_ptr();
let mut ptr = haystack[at..].as_ptr();
let mut last_match = self.get_match(state_id, 0, at);
while ptr < end {
if let Some(pre) = prefilter {
let at = ptr as usize - start as usize;
if prestate.is_effective(at)
&& state_id == self.start_state()
{
match prefilter::next(prestate, pre, haystack, at) {
Candidate::None => return None,
// Since we aren't tracking a state ID, we can
// quit early once we know we have a match.
Candidate::Match(m) => return Some(m),
Candidate::PossibleStartOfMatch(i) => {
ptr = start.offset(i as isize);
}
}
}
}
// SAFETY: next_state is safe for all possible u8 values,
// so the only thing we're concerned about is the validity
// of `state_id`. `state_id` either comes from the caller
// (in which case, we assert above that it is valid), or it
// comes from the return value of next_state, which is also
// guaranteed to be valid.
state_id = self.next_state_unchecked_no_fail(state_id, *ptr);
ptr = ptr.offset(1);
if self.is_match_or_dead_state(state_id) {
if state_id == dead_id() {
// The only way to enter into a dead state is if a
// match has been found, so we assert as much. This
// is different from normal automata, where you might
// enter a dead state if you know a subsequent match
// will never be found (regardless of whether a match
// has already been found). For Aho-Corasick, it is
// built so that we can match at any position, so the
// possibility of a match always exists.
//
// (Unless we have an anchored automaton, in which
// case, dead states are used to stop a search.)
debug_assert!(
last_match.is_some() || self.anchored(),
"failure state should only be seen after match"
);
return last_match;
}
let end = ptr as usize - start as usize;
last_match = self.get_match(state_id, 0, end);
}
}
last_match
}
}
/// Execute an overlapping search.
///
/// When executing an overlapping match, the previous state ID in addition
/// to the previous match index should be given. If there are more matches
/// at the given state, then the match is reported and the given index is
/// incremented.
#[inline(always)]
fn overlapping_find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut Self::ID,
match_index: &mut usize,
) -> Option<Match> {
if self.anchored() && at > 0 && *state_id == self.start_state() {
return None;
}
let match_count = self.match_count(*state_id);
if *match_index < match_count {
// This is guaranteed to return a match since
// match_index < match_count.
let result = self.get_match(*state_id, *match_index, at);
debug_assert!(result.is_some(), "must be a match");
*match_index += 1;
return result;
}
*match_index = 0;
match self.standard_find_at(prestate, haystack, at, state_id) {
None => None,
Some(m) => {
*match_index = 1;
Some(m)
}
}
}
/// Return the earliest match found. This returns as soon as we know that
/// we have a match. As such, this does not necessarily correspond to the
/// leftmost starting match, but rather, the leftmost position at which a
/// match ends.
#[inline(always)]
fn earliest_find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut Self::ID,
) -> Option<Match> {
if *state_id == self.start_state() {
if self.anchored() && at > 0 {
return None;
}
if let Some(m) = self.get_match(*state_id, 0, at) {
return Some(m);
}
}
self.standard_find_at(prestate, haystack, at, state_id)
}
/// A convenience function for finding the next match according to the
/// match semantics of this automaton. For standard match semantics, this
/// finds the earliest match. Otherwise, the leftmost match is found.
#[inline(always)]
fn find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut Self::ID,
) -> Option<Match> {
match *self.match_kind() {
MatchKind::Standard => {
self.earliest_find_at(prestate, haystack, at, state_id)
}
MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => {
self.leftmost_find_at(prestate, haystack, at, state_id)
}
MatchKind::__Nonexhaustive => unreachable!(),
}
}
/// Like find_at, but does not track state identifiers. This permits some
/// optimizations when a prefilter that confirms its own matches is
/// present.
#[inline(always)]
fn find_at_no_state(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Option<Match> {
match *self.match_kind() {
MatchKind::Standard => {
let mut state = self.start_state();
self.earliest_find_at(prestate, haystack, at, &mut state)
}
MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => {
self.leftmost_find_at_no_state(prestate, haystack, at)
}
MatchKind::__Nonexhaustive => unreachable!(),
}
}
}

View File

@ -0,0 +1,128 @@
use std::cmp;
use std::io;
use std::ptr;
/// The default buffer capacity that we use for the stream buffer.
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB
/// A fairly simple roll buffer for supporting stream searches.
///
/// This buffer acts as a temporary place to store a fixed amount of data when
/// reading from a stream. Its central purpose is to allow "rolling" some
/// suffix of the data to the beginning of the buffer before refilling it with
/// more data from the stream. For example, let's say we are trying to match
/// "foobar" on a stream. When we report the match, we'd like to not only
/// report the correct offsets at which the match occurs, but also the matching
/// bytes themselves. So let's say our stream is a file with the following
/// contents: `test test foobar test test`. Now assume that we happen to read
/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
/// Naively, it would not be possible to report a single contiguous `foobar`
/// match, but this roll buffer allows us to do that. Namely, after the second
/// read, the contents of the buffer should be `st foobar test test`, where the
/// search should ultimately resume immediately after `foo`. (The prefix `st `
/// is included because the roll buffer saves N bytes at the end of the buffer,
/// where N is the maximum possible length of a match.)
///
/// A lot of the logic for dealing with this is unfortunately split out between
/// this roll buffer and the `StreamChunkIter`.
#[derive(Debug)]
pub struct Buffer {
/// The raw buffer contents. This has a fixed size and never increases.
buf: Vec<u8>,
/// The minimum size of the buffer, which is equivalent to the maximum
/// possible length of a match. This corresponds to the amount that we
/// roll
min: usize,
/// The end of the contents of this buffer.
end: usize,
}
impl Buffer {
/// Create a new buffer for stream searching. The minimum buffer length
/// given should be the size of the maximum possible match length.
pub fn new(min_buffer_len: usize) -> Buffer {
let min = cmp::max(1, min_buffer_len);
// The minimum buffer amount is also the amount that we roll our
// buffer in order to support incremental searching. To this end,
// our actual capacity needs to be at least 1 byte bigger than our
// minimum amount, otherwise we won't have any overlap. In actuality,
// we want our buffer to be a bit bigger than that for performance
// reasons, so we set a lower bound of `8 * min`.
//
// TODO: It would be good to find a way to test the streaming
// implementation with the minimal buffer size.
let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
Buffer { buf: vec![0; capacity], min, end: 0 }
}
/// Return the contents of this buffer.
#[inline]
pub fn buffer(&self) -> &[u8] {
&self.buf[..self.end]
}
/// Return the minimum size of the buffer. The only way a buffer may be
/// smaller than this is if the stream itself contains less than the
/// minimum buffer amount.
#[inline]
pub fn min_buffer_len(&self) -> usize {
self.min
}
/// Return the total length of the contents in the buffer.
#[inline]
pub fn len(&self) -> usize {
self.end
}
/// Return all free capacity in this buffer.
fn free_buffer(&mut self) -> &mut [u8] {
&mut self.buf[self.end..]
}
/// Refill the contents of this buffer by reading as much as possible into
/// this buffer's free capacity. If no more bytes could be read, then this
/// returns false. Otherwise, this reads until it has filled the buffer
/// past the minimum amount.
pub fn fill<R: io::Read>(&mut self, mut rdr: R) -> io::Result<bool> {
let mut readany = false;
loop {
let readlen = rdr.read(self.free_buffer())?;
if readlen == 0 {
return Ok(readany);
}
readany = true;
self.end += readlen;
if self.len() >= self.min {
return Ok(true);
}
}
}
/// Roll the contents of the buffer so that the suffix of this buffer is
/// moved to the front and all other contents are dropped. The size of the
/// suffix corresponds precisely to the minimum buffer length.
///
/// This should only be called when the entire contents of this buffer have
/// been searched.
pub fn roll(&mut self) {
let roll_start = self
.end
.checked_sub(self.min)
.expect("buffer capacity should be bigger than minimum amount");
let roll_len = self.min;
assert!(roll_start + roll_len <= self.end);
unsafe {
// SAFETY: A buffer contains Copy data, so there's no problem
// moving it around. Safety also depends on our indices being in
// bounds, which they always should be, given the assert above.
ptr::copy(
self.buf[roll_start..].as_ptr(),
self.buf.as_mut_ptr(),
roll_len,
);
}
self.end = roll_len;
}
}

View File

@ -0,0 +1,258 @@
pub const BYTE_FREQUENCIES: [u8; 256] = [
55, // '\x00'
52, // '\x01'
51, // '\x02'
50, // '\x03'
49, // '\x04'
48, // '\x05'
47, // '\x06'
46, // '\x07'
45, // '\x08'
103, // '\t'
242, // '\n'
66, // '\x0b'
67, // '\x0c'
229, // '\r'
44, // '\x0e'
43, // '\x0f'
42, // '\x10'
41, // '\x11'
40, // '\x12'
39, // '\x13'
38, // '\x14'
37, // '\x15'
36, // '\x16'
35, // '\x17'
34, // '\x18'
33, // '\x19'
56, // '\x1a'
32, // '\x1b'
31, // '\x1c'
30, // '\x1d'
29, // '\x1e'
28, // '\x1f'
255, // ' '
148, // '!'
164, // '"'
149, // '#'
136, // '$'
160, // '%'
155, // '&'
173, // "'"
221, // '('
222, // ')'
134, // '*'
122, // '+'
232, // ','
202, // '-'
215, // '.'
224, // '/'
208, // '0'
220, // '1'
204, // '2'
187, // '3'
183, // '4'
179, // '5'
177, // '6'
168, // '7'
178, // '8'
200, // '9'
226, // ':'
195, // ';'
154, // '<'
184, // '='
174, // '>'
126, // '?'
120, // '@'
191, // 'A'
157, // 'B'
194, // 'C'
170, // 'D'
189, // 'E'
162, // 'F'
161, // 'G'
150, // 'H'
193, // 'I'
142, // 'J'
137, // 'K'
171, // 'L'
176, // 'M'
185, // 'N'
167, // 'O'
186, // 'P'
112, // 'Q'
175, // 'R'
192, // 'S'
188, // 'T'
156, // 'U'
140, // 'V'
143, // 'W'
123, // 'X'
133, // 'Y'
128, // 'Z'
147, // '['
138, // '\\'
146, // ']'
114, // '^'
223, // '_'
151, // '`'
249, // 'a'
216, // 'b'
238, // 'c'
236, // 'd'
253, // 'e'
227, // 'f'
218, // 'g'
230, // 'h'
247, // 'i'
135, // 'j'
180, // 'k'
241, // 'l'
233, // 'm'
246, // 'n'
244, // 'o'
231, // 'p'
139, // 'q'
245, // 'r'
243, // 's'
251, // 't'
235, // 'u'
201, // 'v'
196, // 'w'
240, // 'x'
214, // 'y'
152, // 'z'
182, // '{'
205, // '|'
181, // '}'
127, // '~'
27, // '\x7f'
212, // '\x80'
211, // '\x81'
210, // '\x82'
213, // '\x83'
228, // '\x84'
197, // '\x85'
169, // '\x86'
159, // '\x87'
131, // '\x88'
172, // '\x89'
105, // '\x8a'
80, // '\x8b'
98, // '\x8c'
96, // '\x8d'
97, // '\x8e'
81, // '\x8f'
207, // '\x90'
145, // '\x91'
116, // '\x92'
115, // '\x93'
144, // '\x94'
130, // '\x95'
153, // '\x96'
121, // '\x97'
107, // '\x98'
132, // '\x99'
109, // '\x9a'
110, // '\x9b'
124, // '\x9c'
111, // '\x9d'
82, // '\x9e'
108, // '\x9f'
118, // '\xa0'
141, // '¡'
113, // '¢'
129, // '£'
119, // '¤'
125, // '¥'
165, // '¦'
117, // '§'
92, // '¨'
106, // '©'
83, // 'ª'
72, // '«'
99, // '¬'
93, // '\xad'
65, // '®'
79, // '¯'
166, // '°'
237, // '±'
163, // '²'
199, // '³'
190, // '´'
225, // 'µ'
209, // '¶'
203, // '·'
198, // '¸'
217, // '¹'
219, // 'º'
206, // '»'
234, // '¼'
248, // '½'
158, // '¾'
239, // '¿'
255, // 'À'
255, // 'Á'
255, // 'Â'
255, // 'Ã'
255, // 'Ä'
255, // 'Å'
255, // 'Æ'
255, // 'Ç'
255, // 'È'
255, // 'É'
255, // 'Ê'
255, // 'Ë'
255, // 'Ì'
255, // 'Í'
255, // 'Î'
255, // 'Ï'
255, // 'Ð'
255, // 'Ñ'
255, // 'Ò'
255, // 'Ó'
255, // 'Ô'
255, // 'Õ'
255, // 'Ö'
255, // '×'
255, // 'Ø'
255, // 'Ù'
255, // 'Ú'
255, // 'Û'
255, // 'Ü'
255, // 'Ý'
255, // 'Þ'
255, // 'ß'
255, // 'à'
255, // 'á'
255, // 'â'
255, // 'ã'
255, // 'ä'
255, // 'å'
255, // 'æ'
255, // 'ç'
255, // 'è'
255, // 'é'
255, // 'ê'
255, // 'ë'
255, // 'ì'
255, // 'í'
255, // 'î'
255, // 'ï'
255, // 'ð'
255, // 'ñ'
255, // 'ò'
255, // 'ó'
255, // 'ô'
255, // 'õ'
255, // 'ö'
255, // '÷'
255, // 'ø'
255, // 'ù'
255, // 'ú'
255, // 'û'
255, // 'ü'
255, // 'ý'
255, // 'þ'
255, // 'ÿ'
];

View File

@ -0,0 +1,238 @@
use std::fmt;
/// A representation of byte oriented equivalence classes.
///
/// This is used in an FSM to reduce the size of the transition table. This can
/// have a particularly large impact not only on the total size of an FSM, but
/// also on compile times.
#[derive(Clone, Copy)]
pub struct ByteClasses([u8; 256]);
impl ByteClasses {
/// Creates a new set of equivalence classes where all bytes are mapped to
/// the same class.
pub fn empty() -> ByteClasses {
ByteClasses([0; 256])
}
/// Creates a new set of equivalence classes where each byte belongs to
/// its own equivalence class.
pub fn singletons() -> ByteClasses {
let mut classes = ByteClasses::empty();
for i in 0..256 {
classes.set(i as u8, i as u8);
}
classes
}
/// Set the equivalence class for the given byte.
#[inline]
pub fn set(&mut self, byte: u8, class: u8) {
self.0[byte as usize] = class;
}
/// Get the equivalence class for the given byte.
#[inline]
pub fn get(&self, byte: u8) -> u8 {
// SAFETY: This is safe because all dense transitions have
// exactly 256 elements, so all u8 values are valid indices.
unsafe { *self.0.get_unchecked(byte as usize) }
}
/// Return the total number of elements in the alphabet represented by
/// these equivalence classes. Equivalently, this returns the total number
/// of equivalence classes.
#[inline]
pub fn alphabet_len(&self) -> usize {
self.0[255] as usize + 1
}
/// Returns true if and only if every byte in this class maps to its own
/// equivalence class. Equivalently, there are 256 equivalence classes
/// and each class contains exactly one byte.
#[inline]
pub fn is_singleton(&self) -> bool {
self.alphabet_len() == 256
}
/// Returns an iterator over a sequence of representative bytes from each
/// equivalence class. Namely, this yields exactly N items, where N is
/// equivalent to the number of equivalence classes. Each item is an
/// arbitrary byte drawn from each equivalence class.
///
/// This is useful when one is determinizing an NFA and the NFA's alphabet
/// hasn't been converted to equivalence classes yet. Picking an arbitrary
/// byte from each equivalence class then permits a full exploration of
/// the NFA instead of using every possible byte value.
pub fn representatives(&self) -> ByteClassRepresentatives {
ByteClassRepresentatives { classes: self, byte: 0, last_class: None }
}
/// Returns all of the bytes in the given equivalence class.
///
/// The second element in the tuple indicates the number of elements in
/// the array.
fn elements(&self, equiv: u8) -> ([u8; 256], usize) {
let (mut array, mut len) = ([0; 256], 0);
for b in 0..256 {
if self.get(b as u8) == equiv {
array[len] = b as u8;
len += 1;
}
}
(array, len)
}
}
impl fmt::Debug for ByteClasses {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.is_singleton() {
write!(f, "ByteClasses({{singletons}})")
} else {
write!(f, "ByteClasses(")?;
for equiv in 0..self.alphabet_len() {
let (members, len) = self.elements(equiv as u8);
write!(f, " {} => {:?}", equiv, &members[..len])?;
}
write!(f, ")")
}
}
}
/// An iterator over representative bytes from each equivalence class.
#[derive(Debug)]
pub struct ByteClassRepresentatives<'a> {
classes: &'a ByteClasses,
byte: usize,
last_class: Option<u8>,
}
impl<'a> Iterator for ByteClassRepresentatives<'a> {
type Item = u8;
fn next(&mut self) -> Option<u8> {
while self.byte < 256 {
let byte = self.byte as u8;
let class = self.classes.get(byte);
self.byte += 1;
if self.last_class != Some(class) {
self.last_class = Some(class);
return Some(byte);
}
}
None
}
}
/// A byte class builder keeps track of an *approximation* of equivalence
/// classes of bytes during NFA construction. That is, every byte in an
/// equivalence class cannot discriminate between a match and a non-match.
///
/// For example, in the literals `abc` and `xyz`, the bytes [\x00-`], [d-w]
/// and [{-\xFF] never discriminate between a match and a non-match, precisely
/// because they never occur in the literals anywhere.
///
/// Note though that this does not necessarily compute the minimal set of
/// equivalence classes. For example, in the literals above, the byte ranges
/// [\x00-`], [d-w] and [{-\xFF] are all treated as distinct equivalence
/// classes even though they could be treated a single class. The reason for
/// this is implementation complexity. In the future, we should endeavor to
/// compute the minimal equivalence classes since they can have a rather large
/// impact on the size of the DFA.
///
/// The representation here is 256 booleans, all initially set to false. Each
/// boolean maps to its corresponding byte based on position. A `true` value
/// indicates the end of an equivalence class, where its corresponding byte
/// and all of the bytes corresponding to all previous contiguous `false`
/// values are in the same equivalence class.
///
/// This particular representation only permits contiguous ranges of bytes to
/// be in the same equivalence class, which means that we can never discover
/// the true minimal set of equivalence classes.
#[derive(Debug)]
pub struct ByteClassBuilder(Vec<bool>);
impl ByteClassBuilder {
/// Create a new builder of byte classes where all bytes are part of the
/// same equivalence class.
pub fn new() -> ByteClassBuilder {
ByteClassBuilder(vec![false; 256])
}
/// Indicate the the range of byte given (inclusive) can discriminate a
/// match between it and all other bytes outside of the range.
pub fn set_range(&mut self, start: u8, end: u8) {
debug_assert!(start <= end);
if start > 0 {
self.0[start as usize - 1] = true;
}
self.0[end as usize] = true;
}
/// Build byte classes that map all byte values to their corresponding
/// equivalence class. The last mapping indicates the largest equivalence
/// class identifier (which is never bigger than 255).
pub fn build(&self) -> ByteClasses {
let mut classes = ByteClasses::empty();
let mut class = 0u8;
let mut i = 0;
loop {
classes.set(i as u8, class as u8);
if i >= 255 {
break;
}
if self.0[i] {
class = class.checked_add(1).unwrap();
}
i += 1;
}
classes
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn byte_classes() {
let mut set = ByteClassBuilder::new();
set.set_range(b'a', b'z');
let classes = set.build();
assert_eq!(classes.get(0), 0);
assert_eq!(classes.get(1), 0);
assert_eq!(classes.get(2), 0);
assert_eq!(classes.get(b'a' - 1), 0);
assert_eq!(classes.get(b'a'), 1);
assert_eq!(classes.get(b'm'), 1);
assert_eq!(classes.get(b'z'), 1);
assert_eq!(classes.get(b'z' + 1), 2);
assert_eq!(classes.get(254), 2);
assert_eq!(classes.get(255), 2);
let mut set = ByteClassBuilder::new();
set.set_range(0, 2);
set.set_range(4, 6);
let classes = set.build();
assert_eq!(classes.get(0), 0);
assert_eq!(classes.get(1), 0);
assert_eq!(classes.get(2), 0);
assert_eq!(classes.get(3), 1);
assert_eq!(classes.get(4), 2);
assert_eq!(classes.get(5), 2);
assert_eq!(classes.get(6), 2);
assert_eq!(classes.get(7), 3);
assert_eq!(classes.get(255), 3);
}
#[test]
fn full_byte_classes() {
let mut set = ByteClassBuilder::new();
for i in 0..256u16 {
set.set_range(i as u8, i as u8);
}
assert_eq!(set.build().alphabet_len(), 256);
}
}

View File

@ -0,0 +1,709 @@
use std::mem::size_of;
use ahocorasick::MatchKind;
use automaton::Automaton;
use classes::ByteClasses;
use error::Result;
use nfa::{PatternID, PatternLength, NFA};
use prefilter::{Prefilter, PrefilterObj, PrefilterState};
use state_id::{dead_id, fail_id, premultiply_overflow_error, StateID};
use Match;
#[derive(Clone, Debug)]
pub enum DFA<S> {
Standard(Standard<S>),
ByteClass(ByteClass<S>),
Premultiplied(Premultiplied<S>),
PremultipliedByteClass(PremultipliedByteClass<S>),
}
impl<S: StateID> DFA<S> {
fn repr(&self) -> &Repr<S> {
match *self {
DFA::Standard(ref dfa) => dfa.repr(),
DFA::ByteClass(ref dfa) => dfa.repr(),
DFA::Premultiplied(ref dfa) => dfa.repr(),
DFA::PremultipliedByteClass(ref dfa) => dfa.repr(),
}
}
pub fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
pub fn heap_bytes(&self) -> usize {
self.repr().heap_bytes
}
pub fn max_pattern_len(&self) -> usize {
self.repr().max_pattern_len
}
pub fn pattern_count(&self) -> usize {
self.repr().pattern_count
}
pub fn start_state(&self) -> S {
self.repr().start_id
}
#[inline(always)]
pub fn overlapping_find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut S,
match_index: &mut usize,
) -> Option<Match> {
match *self {
DFA::Standard(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::ByteClass(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
}
}
#[inline(always)]
pub fn earliest_find_at(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
state_id: &mut S,
) -> Option<Match> {
match *self {
DFA::Standard(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
DFA::ByteClass(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
DFA::Premultiplied(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
DFA::PremultipliedByteClass(ref dfa) => {
dfa.earliest_find_at(prestate, haystack, at, state_id)
}
}
}
#[inline(always)]
pub fn find_at_no_state(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Option<Match> {
match *self {
DFA::Standard(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
DFA::ByteClass(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
DFA::Premultiplied(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
DFA::PremultipliedByteClass(ref dfa) => {
dfa.find_at_no_state(prestate, haystack, at)
}
}
}
}
#[derive(Clone, Debug)]
pub struct Standard<S>(Repr<S>);
impl<S: StateID> Standard<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for Standard<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
id.to_usize() < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
self.repr().get_match(id, match_index, end)
}
fn match_count(&self, id: S) -> usize {
self.repr().match_count(id)
}
unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
let o = current.to_usize() * 256 + input as usize;
*self.repr().trans.get_unchecked(o)
}
}
#[derive(Clone, Debug)]
pub struct ByteClass<S>(Repr<S>);
impl<S: StateID> ByteClass<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for ByteClass<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
id.to_usize() < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
self.repr().get_match(id, match_index, end)
}
fn match_count(&self, id: S) -> usize {
self.repr().match_count(id)
}
unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
let alphabet_len = self.repr().byte_classes.alphabet_len();
let input = self.repr().byte_classes.get(input);
let o = current.to_usize() * alphabet_len + input as usize;
*self.repr().trans.get_unchecked(o)
}
}
#[derive(Clone, Debug)]
pub struct Premultiplied<S>(Repr<S>);
impl<S: StateID> Premultiplied<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for Premultiplied<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
(id.to_usize() / 256) < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
if id > self.repr().max_match {
return None;
}
self.repr()
.matches
.get(id.to_usize() / 256)
.and_then(|m| m.get(match_index))
.map(|&(id, len)| Match { pattern: id, len, end })
}
fn match_count(&self, id: S) -> usize {
let o = id.to_usize() / 256;
self.repr().matches[o].len()
}
unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
let o = current.to_usize() + input as usize;
*self.repr().trans.get_unchecked(o)
}
}
#[derive(Clone, Debug)]
pub struct PremultipliedByteClass<S>(Repr<S>);
impl<S: StateID> PremultipliedByteClass<S> {
fn repr(&self) -> &Repr<S> {
&self.0
}
}
impl<S: StateID> Automaton for PremultipliedByteClass<S> {
type ID = S;
fn match_kind(&self) -> &MatchKind {
&self.repr().match_kind
}
fn anchored(&self) -> bool {
self.repr().anchored
}
fn prefilter(&self) -> Option<&dyn Prefilter> {
self.repr().prefilter.as_ref().map(|p| p.as_ref())
}
fn start_state(&self) -> S {
self.repr().start_id
}
fn is_valid(&self, id: S) -> bool {
(id.to_usize() / self.repr().alphabet_len()) < self.repr().state_count
}
fn is_match_state(&self, id: S) -> bool {
self.repr().is_match_state(id)
}
fn is_match_or_dead_state(&self, id: S) -> bool {
self.repr().is_match_or_dead_state(id)
}
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
if id > self.repr().max_match {
return None;
}
self.repr()
.matches
.get(id.to_usize() / self.repr().alphabet_len())
.and_then(|m| m.get(match_index))
.map(|&(id, len)| Match { pattern: id, len, end })
}
fn match_count(&self, id: S) -> usize {
let o = id.to_usize() / self.repr().alphabet_len();
self.repr().matches[o].len()
}
unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
let input = self.repr().byte_classes.get(input);
let o = current.to_usize() + input as usize;
*self.repr().trans.get_unchecked(o)
}
}
#[derive(Clone, Debug)]
pub struct Repr<S> {
match_kind: MatchKind,
anchored: bool,
premultiplied: bool,
start_id: S,
/// The length, in bytes, of the longest pattern in this automaton. This
/// information is useful for keeping correct buffer sizes when searching
/// on streams.
max_pattern_len: usize,
/// The total number of patterns added to this automaton. This includes
/// patterns that may never match.
pattern_count: usize,
state_count: usize,
max_match: S,
/// The number of bytes of heap used by this NFA's transition table.
heap_bytes: usize,
/// A prefilter for quickly detecting candidate matchs, if pertinent.
prefilter: Option<PrefilterObj>,
byte_classes: ByteClasses,
trans: Vec<S>,
matches: Vec<Vec<(PatternID, PatternLength)>>,
}
impl<S: StateID> Repr<S> {
/// Returns the total alphabet size for this DFA.
///
/// If byte classes are enabled, then this corresponds to the number of
/// equivalence classes. If they are disabled, then this is always 256.
fn alphabet_len(&self) -> usize {
self.byte_classes.alphabet_len()
}
/// Returns true only if the given state is a match state.
fn is_match_state(&self, id: S) -> bool {
id <= self.max_match && id > dead_id()
}
/// Returns true only if the given state is either a dead state or a match
/// state.
fn is_match_or_dead_state(&self, id: S) -> bool {
id <= self.max_match
}
/// Get the ith match for the given state, where the end position of a
/// match was found at `end`.
///
/// # Panics
///
/// The caller must ensure that the given state identifier is valid,
/// otherwise this may panic. The `match_index` need not be valid. That is,
/// if the given state has no matches then this returns `None`.
fn get_match(
&self,
id: S,
match_index: usize,
end: usize,
) -> Option<Match> {
if id > self.max_match {
return None;
}
self.matches
.get(id.to_usize())
.and_then(|m| m.get(match_index))
.map(|&(id, len)| Match { pattern: id, len, end })
}
/// Return the total number of matches for the given state.
///
/// # Panics
///
/// The caller must ensure that the given identifier is valid, or else
/// this panics.
fn match_count(&self, id: S) -> usize {
self.matches[id.to_usize()].len()
}
/// Get the next state given `from` as the current state and `byte` as the
/// current input byte.
fn next_state(&self, from: S, byte: u8) -> S {
let alphabet_len = self.alphabet_len();
let byte = self.byte_classes.get(byte);
self.trans[from.to_usize() * alphabet_len + byte as usize]
}
/// Set the `byte` transition for the `from` state to point to `to`.
fn set_next_state(&mut self, from: S, byte: u8, to: S) {
let alphabet_len = self.alphabet_len();
let byte = self.byte_classes.get(byte);
self.trans[from.to_usize() * alphabet_len + byte as usize] = to;
}
/// Swap the given states in place.
fn swap_states(&mut self, id1: S, id2: S) {
assert!(!self.premultiplied, "can't swap states in premultiplied DFA");
let o1 = id1.to_usize() * self.alphabet_len();
let o2 = id2.to_usize() * self.alphabet_len();
for b in 0..self.alphabet_len() {
self.trans.swap(o1 + b, o2 + b);
}
self.matches.swap(id1.to_usize(), id2.to_usize());
}
/// This routine shuffles all match states in this DFA to the beginning
/// of the DFA such that every non-match state appears after every match
/// state. (With one exception: the special fail and dead states remain as
/// the first two states.)
///
/// The purpose of doing this shuffling is to avoid an extra conditional
/// in the search loop, and in particular, detecting whether a state is a
/// match or not does not need to access any memory.
///
/// This updates `self.max_match` to point to the last matching state as
/// well as `self.start` if the starting state was moved.
fn shuffle_match_states(&mut self) {
assert!(
!self.premultiplied,
"cannot shuffle match states of premultiplied DFA"
);
if self.state_count <= 1 {
return;
}
let mut first_non_match = self.start_id.to_usize();
while first_non_match < self.state_count
&& self.matches[first_non_match].len() > 0
{
first_non_match += 1;
}
let mut swaps: Vec<S> = vec![fail_id(); self.state_count];
let mut cur = self.state_count - 1;
while cur > first_non_match {
if self.matches[cur].len() > 0 {
self.swap_states(
S::from_usize(cur),
S::from_usize(first_non_match),
);
swaps[cur] = S::from_usize(first_non_match);
swaps[first_non_match] = S::from_usize(cur);
first_non_match += 1;
while first_non_match < cur
&& self.matches[first_non_match].len() > 0
{
first_non_match += 1;
}
}
cur -= 1;
}
for id in (0..self.state_count).map(S::from_usize) {
let alphabet_len = self.alphabet_len();
let offset = id.to_usize() * alphabet_len;
for next in &mut self.trans[offset..offset + alphabet_len] {
if swaps[next.to_usize()] != fail_id() {
*next = swaps[next.to_usize()];
}
}
}
if swaps[self.start_id.to_usize()] != fail_id() {
self.start_id = swaps[self.start_id.to_usize()];
}
self.max_match = S::from_usize(first_non_match - 1);
}
fn premultiply(&mut self) -> Result<()> {
if self.premultiplied || self.state_count <= 1 {
return Ok(());
}
let alpha_len = self.alphabet_len();
premultiply_overflow_error(
S::from_usize(self.state_count - 1),
alpha_len,
)?;
for id in (2..self.state_count).map(S::from_usize) {
let offset = id.to_usize() * alpha_len;
for next in &mut self.trans[offset..offset + alpha_len] {
if *next == dead_id() {
continue;
}
*next = S::from_usize(next.to_usize() * alpha_len);
}
}
self.premultiplied = true;
self.start_id = S::from_usize(self.start_id.to_usize() * alpha_len);
self.max_match = S::from_usize(self.max_match.to_usize() * alpha_len);
Ok(())
}
/// Computes the total amount of heap used by this NFA in bytes.
fn calculate_size(&mut self) {
let mut size = (self.trans.len() * size_of::<S>())
+ (self.matches.len()
* size_of::<Vec<(PatternID, PatternLength)>>());
for state_matches in &self.matches {
size +=
state_matches.len() * size_of::<(PatternID, PatternLength)>();
}
size += self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes());
self.heap_bytes = size;
}
}
/// A builder for configuring the determinization of an NFA into a DFA.
#[derive(Clone, Debug)]
pub struct Builder {
premultiply: bool,
byte_classes: bool,
}
impl Builder {
/// Create a new builder for a DFA.
pub fn new() -> Builder {
Builder { premultiply: true, byte_classes: true }
}
/// Build a DFA from the given NFA.
///
/// This returns an error if the state identifiers exceed their
/// representation size. This can only happen when state ids are
/// premultiplied (which is enabled by default).
pub fn build<S: StateID>(&self, nfa: &NFA<S>) -> Result<DFA<S>> {
let byte_classes = if self.byte_classes {
nfa.byte_classes().clone()
} else {
ByteClasses::singletons()
};
let alphabet_len = byte_classes.alphabet_len();
let trans = vec![fail_id(); alphabet_len * nfa.state_len()];
let matches = vec![vec![]; nfa.state_len()];
let mut repr = Repr {
match_kind: nfa.match_kind().clone(),
anchored: nfa.anchored(),
premultiplied: false,
start_id: nfa.start_state(),
max_pattern_len: nfa.max_pattern_len(),
pattern_count: nfa.pattern_count(),
state_count: nfa.state_len(),
max_match: fail_id(),
heap_bytes: 0,
prefilter: nfa.prefilter_obj().map(|p| p.clone()),
byte_classes: byte_classes.clone(),
trans: trans,
matches: matches,
};
for id in (0..nfa.state_len()).map(S::from_usize) {
repr.matches[id.to_usize()].extend_from_slice(nfa.matches(id));
let fail = nfa.failure_transition(id);
nfa.iter_all_transitions(&byte_classes, id, |b, mut next| {
if next == fail_id() {
next = nfa_next_state_memoized(nfa, &repr, id, fail, b);
}
repr.set_next_state(id, b, next);
});
}
repr.shuffle_match_states();
repr.calculate_size();
if self.premultiply {
repr.premultiply()?;
if byte_classes.is_singleton() {
Ok(DFA::Premultiplied(Premultiplied(repr)))
} else {
Ok(DFA::PremultipliedByteClass(PremultipliedByteClass(repr)))
}
} else {
if byte_classes.is_singleton() {
Ok(DFA::Standard(Standard(repr)))
} else {
Ok(DFA::ByteClass(ByteClass(repr)))
}
}
}
/// Whether to use byte classes or in the DFA.
pub fn byte_classes(&mut self, yes: bool) -> &mut Builder {
self.byte_classes = yes;
self
}
/// Whether to premultiply state identifier in the DFA.
pub fn premultiply(&mut self, yes: bool) -> &mut Builder {
self.premultiply = yes;
self
}
}
/// This returns the next NFA transition (including resolving failure
/// transitions), except once it sees a state id less than the id of the DFA
/// state that is currently being populated, then we no longer need to follow
/// failure transitions and can instead query the pre-computed state id from
/// the DFA itself.
///
/// In general, this should only be called when a failure transition is seen.
fn nfa_next_state_memoized<S: StateID>(
nfa: &NFA<S>,
dfa: &Repr<S>,
populating: S,
mut current: S,
input: u8,
) -> S {
loop {
if current < populating {
return dfa.next_state(current, input);
}
let next = nfa.next_state(current, input);
if next != fail_id() {
return next;
}
current = nfa.failure_transition(current);
}
}

View File

@ -0,0 +1,101 @@
use std::error;
use std::fmt;
use std::result;
pub type Result<T> = result::Result<T, Error>;
/// An error that occurred during the construction of an Aho-Corasick
/// automaton.
#[derive(Clone, Debug)]
pub struct Error {
kind: ErrorKind,
}
/// The kind of error that occurred.
#[derive(Clone, Debug)]
pub enum ErrorKind {
/// An error that occurs when constructing an automaton would require the
/// use of a state ID that overflows the chosen state ID representation.
/// For example, if one is using `u8` for state IDs and builds a DFA with
/// 257 states, then the last state's ID will be `256` which cannot be
/// represented with `u8`.
StateIDOverflow {
/// The maximum possible state ID.
max: usize,
},
/// An error that occurs when premultiplication of state IDs is requested
/// when constructing an Aho-Corasick DFA, but doing so would overflow the
/// chosen state ID representation.
///
/// When `max == requested_max`, then the state ID would overflow `usize`.
PremultiplyOverflow {
/// The maximum possible state id.
max: usize,
/// The maximum ID required by premultiplication.
requested_max: usize,
},
}
impl Error {
/// Return the kind of this error.
pub fn kind(&self) -> &ErrorKind {
&self.kind
}
pub(crate) fn state_id_overflow(max: usize) -> Error {
Error { kind: ErrorKind::StateIDOverflow { max } }
}
pub(crate) fn premultiply_overflow(
max: usize,
requested_max: usize,
) -> Error {
Error { kind: ErrorKind::PremultiplyOverflow { max, requested_max } }
}
}
impl error::Error for Error {
fn description(&self) -> &str {
match self.kind {
ErrorKind::StateIDOverflow { .. } => {
"state id representation too small"
}
ErrorKind::PremultiplyOverflow { .. } => {
"state id representation too small for premultiplication"
}
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.kind {
ErrorKind::StateIDOverflow { max } => write!(
f,
"building the automaton failed because it required \
building more states that can be identified, where the \
maximum ID for the chosen representation is {}",
max,
),
ErrorKind::PremultiplyOverflow { max, requested_max } => {
if max == requested_max {
write!(
f,
"premultiplication of states requires the ability to \
represent a state ID greater than what can fit on \
this platform's usize, which is {}",
::std::usize::MAX,
)
} else {
write!(
f,
"premultiplication of states requires the ability to \
represent at least a state ID of {}, but the chosen \
representation only permits a maximum state ID of {}",
requested_max, max,
)
}
}
}
}
}

View File

@ -0,0 +1,297 @@
/*!
A library for finding occurrences of many patterns at once. This library
provides multiple pattern search principally through an implementation of the
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
which builds a fast finite state machine for executing searches in linear time.
Additionally, this library provides a number of configuration options for
building the automaton that permit controlling the space versus time trade
off. Other features include simple ASCII case insensitive matching, finding
overlapping matches, replacements, searching streams and even searching and
replacing text in streams.
Finally, unlike all other (known) Aho-Corasick implementations, this one
supports enabling
[leftmost-first](enum.MatchKind.html#variant.LeftmostFirst)
or
[leftmost-longest](enum.MatchKind.html#variant.LeftmostFirst)
match semantics, using a (seemingly) novel alternative construction algorithm.
For more details on what match semantics means, see the
[`MatchKind`](enum.MatchKind.html)
type.
# Overview
This section gives a brief overview of the primary types in this crate:
* [`AhoCorasick`](struct.AhoCorasick.html) is the primary type and represents
an Aho-Corasick automaton. This is the type you use to execute searches.
* [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) can be used to build
an Aho-Corasick automaton, and supports configuring a number of options.
* [`Match`](struct.Match.html) represents a single match reported by an
Aho-Corasick automaton. Each match has two pieces of information: the pattern
that matched and the start and end byte offsets corresponding to the position
in the haystack at which it matched.
Additionally, the [`packed`](packed/index.html) sub-module contains a lower
level API for using fast vectorized routines for finding a small number of
patterns in a haystack.
# Example: basic searching
This example shows how to search for occurrences of multiple patterns
simultaneously. Each match includes the pattern that matched along with the
byte offsets of the match.
```
use aho_corasick::AhoCorasick;
let patterns = &["apple", "maple", "Snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasick::new(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
# Example: case insensitivity
This is like the previous example, but matches `Snapple` case insensitively
using `AhoCorasickBuilder`:
```
use aho_corasick::AhoCorasickBuilder;
let patterns = &["apple", "maple", "snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(1, 13, 18),
(0, 28, 33),
(2, 43, 50),
]);
```
# Example: replacing matches in a stream
This example shows how to execute a search and replace on a stream without
loading the entire stream into memory first.
```
use aho_corasick::AhoCorasick;
# fn example() -> Result<(), ::std::io::Error> {
let patterns = &["fox", "brown", "quick"];
let replace_with = &["sloth", "grey", "slow"];
// In a real example, these might be `std::fs::File`s instead. All you need to
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
let rdr = "The quick brown fox.";
let mut wtr = vec![];
let ac = AhoCorasick::new(patterns);
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
# Ok(()) }; example().unwrap()
```
# Example: finding the leftmost first match
In the textbook description of Aho-Corasick, its formulation is typically
structured such that it reports all possible matches, even when they overlap
with another. In many cases, overlapping matches may not be desired, such as
the case of finding all successive non-overlapping matches like you might with
a standard regular expression.
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
this doesn't always work in the expected way, since it will report matches as
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
against the text `Samwise`. Most regex engines (that are Perl-like, or
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
algorithm modified for reporting non-overlapping matches will report `Sam`.
A novel contribution of this library is the ability to change the match
semantics of Aho-Corasick (without additional search time overhead) such that
`Samwise` is reported instead. For example, here's the standard approach:
```
use aho_corasick::AhoCorasick;
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasick::new(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
```
And now here's the leftmost-first version, which matches how a Perl-like
regex will work:
```
use aho_corasick::{AhoCorasickBuilder, MatchKind};
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build(patterns);
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
```
In addition to leftmost-first semantics, this library also supports
leftmost-longest semantics, which match the POSIX behavior of a regular
expression alternation. See
[`MatchKind`](enum.MatchKind.html)
for more details.
# Prefilters
While an Aho-Corasick automaton can perform admirably when compared to more
naive solutions, it is generally slower than more specialized algorithms that
are accelerated using vector instructions such as SIMD.
For that reason, this library will internally use a "prefilter" to attempt
to accelerate searches when possible. Currently, this library has fairly
limited implementation that only applies when there are 3 or fewer unique
starting bytes among all patterns in an automaton.
While a prefilter is generally good to have on by default since it works well
in the common case, it can lead to less predictable or even sub-optimal
performance in some cases. For that reason, prefilters can be disabled via
[`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter).
*/
#![deny(missing_docs)]
// We can never be truly no_std, but we could be alloc-only some day, so
// require the std feature for now.
#[cfg(not(feature = "std"))]
compile_error!("`std` feature is currently required to build this crate");
extern crate memchr;
#[cfg(test)]
#[macro_use]
extern crate doc_comment;
#[cfg(test)]
doctest!("../README.md");
pub use ahocorasick::{
AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind,
StreamFindIter,
};
pub use error::{Error, ErrorKind};
pub use state_id::StateID;
mod ahocorasick;
mod automaton;
mod buffer;
mod byte_frequencies;
mod classes;
mod dfa;
mod error;
mod nfa;
pub mod packed;
mod prefilter;
mod state_id;
#[cfg(test)]
mod tests;
/// A representation of a match reported by an Aho-Corasick automaton.
///
/// A match has two essential pieces of information: the identifier of the
/// pattern that matched, along with the start and end offsets of the match
/// in the haystack.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use aho_corasick::AhoCorasick;
///
/// let ac = AhoCorasick::new(&[
/// "foo", "bar", "baz",
/// ]);
/// let mat = ac.find("xxx bar xxx").expect("should have a match");
/// assert_eq!(1, mat.pattern());
/// assert_eq!(4, mat.start());
/// assert_eq!(7, mat.end());
/// ```
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Match {
/// The pattern id.
pattern: usize,
/// The length of this match, such that the starting position of the match
/// is `end - len`.
///
/// We use length here because, other than the pattern id, the only
/// information about each pattern that the automaton stores is its length.
/// So using the length here is just a bit more natural. But it isn't
/// technically required.
len: usize,
/// The end offset of the match, exclusive.
end: usize,
}
impl Match {
/// Returns the identifier of the pattern that matched.
///
/// The identifier of a pattern is derived from the position in which it
/// was originally inserted into the corresponding automaton. The first
/// pattern has identifier `0`, and each subsequent pattern is `1`, `2`
/// and so on.
#[inline]
pub fn pattern(&self) -> usize {
self.pattern
}
/// The starting position of the match.
#[inline]
pub fn start(&self) -> usize {
self.end - self.len
}
/// The ending position of the match.
#[inline]
pub fn end(&self) -> usize {
self.end
}
/// Returns true if and only if this match is empty. That is, when
/// `start() == end()`.
///
/// An empty match can only be returned when the empty string was among
/// the patterns used to build the Aho-Corasick automaton.
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
fn increment(&self, by: usize) -> Match {
Match { pattern: self.pattern, len: self.len, end: self.end + by }
}
#[inline]
fn from_span(id: usize, start: usize, end: usize) -> Match {
Match { pattern: id, len: end - start, end: end }
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,632 @@
use std::u16;
use packed::pattern::Patterns;
use packed::rabinkarp::RabinKarp;
use packed::teddy::{self, Teddy};
use Match;
/// This is a limit placed on the total number of patterns we're willing to try
/// and match at once. As more sophisticated algorithms are added, this number
/// may be increased.
const PATTERN_LIMIT: usize = 128;
/// A knob for controlling the match semantics of a packed multiple string
/// searcher.
///
/// This differs from the
/// [`MatchKind`](../enum.MatchKind.html)
/// type in the top-level crate module in that it doesn't support
/// "standard" match semantics, and instead only supports leftmost-first or
/// leftmost-longest. Namely, "standard" semantics cannot be easily supported
/// by packed searchers.
///
/// For more information on the distinction between leftmost-first and
/// leftmost-longest, see the docs on the top-level `MatchKind` type.
///
/// Unlike the top-level `MatchKind` type, the default match semantics for this
/// type are leftmost-first.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum MatchKind {
/// Use leftmost-first match semantics, which reports leftmost matches.
/// When there are multiple possible leftmost matches, the match
/// corresponding to the pattern that appeared earlier when constructing
/// the automaton is reported.
///
/// This is the default.
LeftmostFirst,
/// Use leftmost-longest match semantics, which reports leftmost matches.
/// When there are multiple possible leftmost matches, the longest match
/// is chosen.
LeftmostLongest,
/// Hints that destructuring should not be exhaustive.
///
/// This enum may grow additional variants, so this makes sure clients
/// don't count on exhaustive matching. (Otherwise, adding a new variant
/// could break existing code.)
#[doc(hidden)]
__Nonexhaustive,
}
impl Default for MatchKind {
fn default() -> MatchKind {
MatchKind::LeftmostFirst
}
}
/// The configuration for a packed multiple pattern searcher.
///
/// The configuration is currently limited only to being able to select the
/// match semantics (leftmost-first or leftmost-longest) of a searcher. In the
/// future, more knobs may be made available.
///
/// A configuration produces a [`packed::Builder`](struct.Builder.html), which
/// in turn can be used to construct a
/// [`packed::Searcher`](struct.Searcher.html) for searching.
///
/// # Example
///
/// This example shows how to use leftmost-longest semantics instead of the
/// default (leftmost-first).
///
/// ```
/// use aho_corasick::packed::{Config, MatchKind};
///
/// # fn example() -> Option<()> {
/// let searcher = Config::new()
/// .match_kind(MatchKind::LeftmostLongest)
/// .builder()
/// .add("foo")
/// .add("foobar")
/// .build()?;
/// let matches: Vec<usize> = searcher
/// .find_iter("foobar")
/// .map(|mat| mat.pattern())
/// .collect();
/// assert_eq!(vec![1], matches);
/// # Some(()) }
/// # if cfg!(target_arch = "x86_64") {
/// # example().unwrap()
/// # } else {
/// # assert!(example().is_none());
/// # }
/// ```
#[derive(Clone, Debug)]
pub struct Config {
kind: MatchKind,
force: Option<ForceAlgorithm>,
force_teddy_fat: Option<bool>,
force_avx: Option<bool>,
}
/// An internal option for forcing the use of a particular packed algorithm.
///
/// When an algorithm is forced, if a searcher could not be constructed for it,
/// then no searcher will be returned even if an alternative algorithm would
/// work.
#[derive(Clone, Debug)]
enum ForceAlgorithm {
Teddy,
RabinKarp,
}
impl Default for Config {
fn default() -> Config {
Config::new()
}
}
impl Config {
/// Create a new default configuration. A default configuration uses
/// leftmost-first match semantics.
pub fn new() -> Config {
Config {
kind: MatchKind::LeftmostFirst,
force: None,
force_teddy_fat: None,
force_avx: None,
}
}
/// Create a packed builder from this configuration. The builder can be
/// used to accumulate patterns and create a
/// [`Searcher`](struct.Searcher.html)
/// from them.
pub fn builder(&self) -> Builder {
Builder::from_config(self.clone())
}
/// Set the match semantics for this configuration.
pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config {
self.kind = kind;
self
}
/// An undocumented method for forcing the use of the Teddy algorithm.
///
/// This is only exposed for more precise testing and benchmarks. Callers
/// should not use it as it is not part of the API stability guarantees of
/// this crate.
#[doc(hidden)]
pub fn force_teddy(&mut self, yes: bool) -> &mut Config {
if yes {
self.force = Some(ForceAlgorithm::Teddy);
} else {
self.force = None;
}
self
}
/// An undocumented method for forcing the use of the Fat Teddy algorithm.
///
/// This is only exposed for more precise testing and benchmarks. Callers
/// should not use it as it is not part of the API stability guarantees of
/// this crate.
#[doc(hidden)]
pub fn force_teddy_fat(&mut self, yes: Option<bool>) -> &mut Config {
self.force_teddy_fat = yes;
self
}
/// An undocumented method for forcing the use of SSE (`Some(false)`) or
/// AVX (`Some(true)`) algorithms.
///
/// This is only exposed for more precise testing and benchmarks. Callers
/// should not use it as it is not part of the API stability guarantees of
/// this crate.
#[doc(hidden)]
pub fn force_avx(&mut self, yes: Option<bool>) -> &mut Config {
self.force_avx = yes;
self
}
/// An undocumented method for forcing the use of the Rabin-Karp algorithm.
///
/// This is only exposed for more precise testing and benchmarks. Callers
/// should not use it as it is not part of the API stability guarantees of
/// this crate.
#[doc(hidden)]
pub fn force_rabin_karp(&mut self, yes: bool) -> &mut Config {
if yes {
self.force = Some(ForceAlgorithm::RabinKarp);
} else {
self.force = None;
}
self
}
}
/// A builder for constructing a packed searcher from a collection of patterns.
///
/// # Example
///
/// This example shows how to use a builder to construct a searcher. By
/// default, leftmost-first match semantics are used.
///
/// ```
/// use aho_corasick::packed::{Builder, MatchKind};
///
/// # fn example() -> Option<()> {
/// let searcher = Builder::new()
/// .add("foobar")
/// .add("foo")
/// .build()?;
/// let matches: Vec<usize> = searcher
/// .find_iter("foobar")
/// .map(|mat| mat.pattern())
/// .collect();
/// assert_eq!(vec![0], matches);
/// # Some(()) }
/// # if cfg!(target_arch = "x86_64") {
/// # example().unwrap()
/// # } else {
/// # assert!(example().is_none());
/// # }
/// ```
#[derive(Clone, Debug)]
pub struct Builder {
/// The configuration of this builder and subsequent matcher.
config: Config,
/// Set to true if the builder detects that a matcher cannot be built.
inert: bool,
/// The patterns provided by the caller.
patterns: Patterns,
}
impl Builder {
/// Create a new builder for constructing a multi-pattern searcher. This
/// constructor uses the default configuration.
pub fn new() -> Builder {
Builder::from_config(Config::new())
}
fn from_config(config: Config) -> Builder {
Builder { config, inert: false, patterns: Patterns::new() }
}
/// Build a searcher from the patterns added to this builder so far.
pub fn build(&self) -> Option<Searcher> {
if self.inert || self.patterns.is_empty() {
return None;
}
let mut patterns = self.patterns.clone();
patterns.set_match_kind(self.config.kind);
let rabinkarp = RabinKarp::new(&patterns);
// Effectively, we only want to return a searcher if we can use Teddy,
// since Teddy is our only fast packed searcher at the moment.
// Rabin-Karp is only used when searching haystacks smaller than what
// Teddy can support. Thus, the only way to get a Rabin-Karp searcher
// is to force it using undocumented APIs (for tests/benchmarks).
let (search_kind, minimum_len) = match self.config.force {
None | Some(ForceAlgorithm::Teddy) => {
let teddy = match self.build_teddy(&patterns) {
None => return None,
Some(teddy) => teddy,
};
let minimum_len = teddy.minimum_len();
(SearchKind::Teddy(teddy), minimum_len)
}
Some(ForceAlgorithm::RabinKarp) => (SearchKind::RabinKarp, 0),
};
Some(Searcher {
config: self.config.clone(),
patterns: patterns,
rabinkarp: rabinkarp,
search_kind,
minimum_len,
})
}
fn build_teddy(&self, patterns: &Patterns) -> Option<Teddy> {
teddy::Builder::new()
.avx(self.config.force_avx)
.fat(self.config.force_teddy_fat)
.build(&patterns)
}
/// Add the given pattern to this set to match.
///
/// The order in which patterns are added is significant. Namely, when
/// using leftmost-first match semantics, then when multiple patterns can
/// match at a particular location, the pattern that was added first is
/// used as the match.
///
/// If the number of patterns added exceeds the amount supported by packed
/// searchers, then the builder will stop accumulating patterns and render
/// itself inert. At this point, constructing a searcher will always return
/// `None`.
pub fn add<P: AsRef<[u8]>>(&mut self, pattern: P) -> &mut Builder {
if self.inert {
return self;
} else if self.patterns.len() >= PATTERN_LIMIT {
self.inert = true;
self.patterns.reset();
return self;
}
// Just in case PATTERN_LIMIT increases beyond u16::MAX.
assert!(self.patterns.len() <= u16::MAX as usize);
let pattern = pattern.as_ref();
if pattern.is_empty() {
self.inert = true;
self.patterns.reset();
return self;
}
self.patterns.add(pattern);
self
}
/// Add the given iterator of patterns to this set to match.
///
/// The iterator must yield elements that can be converted into a `&[u8]`.
///
/// The order in which patterns are added is significant. Namely, when
/// using leftmost-first match semantics, then when multiple patterns can
/// match at a particular location, the pattern that was added first is
/// used as the match.
///
/// If the number of patterns added exceeds the amount supported by packed
/// searchers, then the builder will stop accumulating patterns and render
/// itself inert. At this point, constructing a searcher will always return
/// `None`.
pub fn extend<I, P>(&mut self, patterns: I) -> &mut Builder
where
I: IntoIterator<Item = P>,
P: AsRef<[u8]>,
{
for p in patterns {
self.add(p);
}
self
}
}
impl Default for Builder {
fn default() -> Builder {
Builder::new()
}
}
/// A packed searcher for quickly finding occurrences of multiple patterns.
///
/// If callers need more flexible construction, or if one wants to change the
/// match semantics (either leftmost-first or leftmost-longest), then one can
/// use the [`Config`](struct.Config.html) and/or
/// [`Builder`](struct.Builder.html) types for more fine grained control.
///
/// # Example
///
/// This example shows how to create a searcher from an iterator of patterns.
/// By default, leftmost-first match semantics are used.
///
/// ```
/// use aho_corasick::packed::{MatchKind, Searcher};
///
/// # fn example() -> Option<()> {
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
/// let matches: Vec<usize> = searcher
/// .find_iter("foobar")
/// .map(|mat| mat.pattern())
/// .collect();
/// assert_eq!(vec![0], matches);
/// # Some(()) }
/// # if cfg!(target_arch = "x86_64") {
/// # example().unwrap()
/// # } else {
/// # assert!(example().is_none());
/// # }
/// ```
#[derive(Clone, Debug)]
pub struct Searcher {
config: Config,
patterns: Patterns,
rabinkarp: RabinKarp,
search_kind: SearchKind,
minimum_len: usize,
}
#[derive(Clone, Debug)]
enum SearchKind {
Teddy(Teddy),
RabinKarp,
}
impl Searcher {
/// A convenience function for constructing a searcher from an iterator
/// of things that can be converted to a `&[u8]`.
///
/// If a searcher could not be constructed (either because of an
/// unsupported CPU or because there are too many patterns), then `None`
/// is returned.
///
/// # Example
///
/// Basic usage:
///
/// ```
/// use aho_corasick::packed::{MatchKind, Searcher};
///
/// # fn example() -> Option<()> {
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
/// let matches: Vec<usize> = searcher
/// .find_iter("foobar")
/// .map(|mat| mat.pattern())
/// .collect();
/// assert_eq!(vec![0], matches);
/// # Some(()) }
/// # if cfg!(target_arch = "x86_64") {
/// # example().unwrap()
/// # } else {
/// # assert!(example().is_none());
/// # }
/// ```
pub fn new<I, P>(patterns: I) -> Option<Searcher>
where
I: IntoIterator<Item = P>,
P: AsRef<[u8]>,
{
Builder::new().extend(patterns).build()
}
/// Return the first occurrence of any of the patterns in this searcher,
/// according to its match semantics, in the given haystack. The `Match`
/// returned will include the identifier of the pattern that matched, which
/// corresponds to the index of the pattern (starting from `0`) in which it
/// was added.
///
/// # Example
///
/// Basic usage:
///
/// ```
/// use aho_corasick::packed::{MatchKind, Searcher};
///
/// # fn example() -> Option<()> {
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
/// let mat = searcher.find("foobar")?;
/// assert_eq!(0, mat.pattern());
/// assert_eq!(0, mat.start());
/// assert_eq!(6, mat.end());
/// # Some(()) }
/// # if cfg!(target_arch = "x86_64") {
/// # example().unwrap()
/// # } else {
/// # assert!(example().is_none());
/// # }
/// ```
pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<Match> {
self.find_at(haystack, 0)
}
/// Return the first occurrence of any of the patterns in this searcher,
/// according to its match semantics, in the given haystack starting from
/// the given position.
///
/// The `Match` returned will include the identifier of the pattern that
/// matched, which corresponds to the index of the pattern (starting from
/// `0`) in which it was added. The offsets in the `Match` will be relative
/// to the start of `haystack` (and not `at`).
///
/// # Example
///
/// Basic usage:
///
/// ```
/// use aho_corasick::packed::{MatchKind, Searcher};
///
/// # fn example() -> Option<()> {
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
/// let mat = searcher.find_at("foofoobar", 3)?;
/// assert_eq!(0, mat.pattern());
/// assert_eq!(3, mat.start());
/// assert_eq!(9, mat.end());
/// # Some(()) }
/// # if cfg!(target_arch = "x86_64") {
/// # example().unwrap()
/// # } else {
/// # assert!(example().is_none());
/// # }
/// ```
pub fn find_at<B: AsRef<[u8]>>(
&self,
haystack: B,
at: usize,
) -> Option<Match> {
let haystack = haystack.as_ref();
match self.search_kind {
SearchKind::Teddy(ref teddy) => {
if haystack[at..].len() < teddy.minimum_len() {
return self.slow_at(haystack, at);
}
teddy.find_at(&self.patterns, haystack, at)
}
SearchKind::RabinKarp => {
self.rabinkarp.find_at(&self.patterns, haystack, at)
}
}
}
/// Return an iterator of non-overlapping occurrences of the patterns in
/// this searcher, according to its match semantics, in the given haystack.
///
/// # Example
///
/// Basic usage:
///
/// ```
/// use aho_corasick::packed::{MatchKind, Searcher};
///
/// # fn example() -> Option<()> {
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
/// let matches: Vec<usize> = searcher
/// .find_iter("foobar fooba foofoo")
/// .map(|mat| mat.pattern())
/// .collect();
/// assert_eq!(vec![0, 1, 1, 1], matches);
/// # Some(()) }
/// # if cfg!(target_arch = "x86_64") {
/// # example().unwrap()
/// # } else {
/// # assert!(example().is_none());
/// # }
/// ```
pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>(
&'a self,
haystack: &'b B,
) -> FindIter<'a, 'b> {
FindIter { searcher: self, haystack: haystack.as_ref(), at: 0 }
}
/// Returns the match kind used by this packed searcher.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use aho_corasick::packed::{MatchKind, Searcher};
///
/// # fn example() -> Option<()> {
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
/// // leftmost-first is the default.
/// assert_eq!(&MatchKind::LeftmostFirst, searcher.match_kind());
/// # Some(()) }
/// # if cfg!(target_arch = "x86_64") {
/// # example().unwrap()
/// # } else {
/// # assert!(example().is_none());
/// # }
/// ```
pub fn match_kind(&self) -> &MatchKind {
self.patterns.match_kind()
}
/// Returns the minimum length of a haystack that is required in order for
/// packed searching to be effective.
///
/// In some cases, the underlying packed searcher may not be able to search
/// very short haystacks. When that occurs, the implementation will defer
/// to a slower non-packed searcher (which is still generally faster than
/// Aho-Corasick for a small number of patterns). However, callers may
/// want to avoid ever using the slower variant, which one can do by
/// never passing a haystack shorter than the minimum length returned by
/// this method.
pub fn minimum_len(&self) -> usize {
self.minimum_len
}
/// Returns the approximate total amount of heap used by this searcher, in
/// units of bytes.
pub fn heap_bytes(&self) -> usize {
self.patterns.heap_bytes()
+ self.rabinkarp.heap_bytes()
+ self.search_kind.heap_bytes()
}
/// Use a slow (non-packed) searcher.
///
/// This is useful when a packed searcher could be constructed, but could
/// not be used to search a specific haystack. For example, if Teddy was
/// built but the haystack is smaller than ~34 bytes, then Teddy might not
/// be able to run.
fn slow_at(&self, haystack: &[u8], at: usize) -> Option<Match> {
self.rabinkarp.find_at(&self.patterns, haystack, at)
}
}
impl SearchKind {
fn heap_bytes(&self) -> usize {
match *self {
SearchKind::Teddy(ref ted) => ted.heap_bytes(),
SearchKind::RabinKarp => 0,
}
}
}
/// An iterator over non-overlapping matches from a packed searcher.
///
/// The lifetime `'s` refers to the lifetime of the underlying
/// [`Searcher`](struct.Searcher.html), while the lifetime `'h` refers to the
/// lifetime of the haystack being searched.
#[derive(Debug)]
pub struct FindIter<'s, 'h> {
searcher: &'s Searcher,
haystack: &'h [u8],
at: usize,
}
impl<'s, 'h> Iterator for FindIter<'s, 'h> {
type Item = Match;
fn next(&mut self) -> Option<Match> {
if self.at > self.haystack.len() {
return None;
}
match self.searcher.find_at(&self.haystack, self.at) {
None => None,
Some(c) => {
self.at = c.end;
Some(c)
}
}
}
}

View File

@ -0,0 +1,117 @@
/*!
A lower level API for packed multiple substring search, principally for a small
number of patterns.
This sub-module provides vectorized routines for quickly finding matches of a
small number of patterns. In general, users of this crate shouldn't need to
interface with this module directory, as the primary
[`AhoCorasick`](../struct.AhoCorasick.html)
searcher will use these routines automatically as a prefilter when applicable.
However, in some cases, callers may want to bypass the Aho-Corasick machinery
entirely and use this vectorized searcher directly.
# Overview
The primary types in this sub-module are:
* [`Searcher`](struct.Searcher.html) executes the actual search algorithm to
report matches in a haystack.
* [`Builder`](struct.Builder.html) accumulates patterns incrementally and can
construct a `Searcher`.
* [`Config`](struct.Config.html) permits tuning the searcher, and itself will
produce a `Builder` (which can then be used to build a `Searcher`).
Currently, the only tuneable knob are the match semantics, but this may be
expanded in the future.
# Examples
This example shows how to create a searcher from an iterator of patterns.
By default, leftmost-first match semantics are used. (See the top-level
[`MatchKind`](../enum.MatchKind.html) type for more details about match
semantics, which apply similarly to packed substring search.)
```
use aho_corasick::packed::{MatchKind, Searcher};
# fn example() -> Option<()> {
let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
let matches: Vec<usize> = searcher
.find_iter("foobar")
.map(|mat| mat.pattern())
.collect();
assert_eq!(vec![0], matches);
# Some(()) }
# if cfg!(target_arch = "x86_64") {
# example().unwrap()
# } else {
# assert!(example().is_none());
# }
```
This example shows how to use [`Config`](struct.Config.html) to change the
match semantics to leftmost-longest:
```
use aho_corasick::packed::{Config, MatchKind};
# fn example() -> Option<()> {
let searcher = Config::new()
.match_kind(MatchKind::LeftmostLongest)
.builder()
.add("foo")
.add("foobar")
.build()?;
let matches: Vec<usize> = searcher
.find_iter("foobar")
.map(|mat| mat.pattern())
.collect();
assert_eq!(vec![1], matches);
# Some(()) }
# if cfg!(target_arch = "x86_64") {
# example().unwrap()
# } else {
# assert!(example().is_none());
# }
```
# Packed substring searching
Packed substring searching refers to the use of SIMD (Single Instruction,
Multiple Data) to accelerate the detection of matches in a haystack. Unlike
conventional algorithms, such as Aho-Corasick, SIMD algorithms for substring
search tend to do better with a small number of patterns, where as Aho-Corasick
generally maintains reasonably consistent performance regardless of the number
of patterns you give it. Because of this, the vectorized searcher in this
sub-module cannot be used as a general purpose searcher, since building the
searcher may fail. However, in exchange, when searching for a small number of
patterns, searching can be quite a bit faster than Aho-Corasick (sometimes by
an order of magnitude).
The key take away here is that constructing a searcher from a list of patterns
is a fallible operation. While the precise conditions under which building a
searcher can fail is specifically an implementation detail, here are some
common reasons:
* Too many patterns were given. Typically, the limit is on the order of 100 or
so, but this limit may fluctuate based on available CPU features.
* The available packed algorithms require CPU features that aren't available.
For example, currently, this crate only provides packed algorithms for
`x86_64`. Therefore, constructing a packed searcher on any other target
(e.g., ARM) will always fail.
* Zero patterns were given, or one of the patterns given was empty. Packed
searchers require at least one pattern and that all patterns are non-empty.
* Something else about the nature of the patterns (typically based on
heuristics) suggests that a packed searcher would perform very poorly, so
no searcher is built.
*/
pub use packed::api::{Builder, Config, FindIter, MatchKind, Searcher};
mod api;
mod pattern;
mod rabinkarp;
mod teddy;
#[cfg(test)]
mod tests;
#[cfg(target_arch = "x86_64")]
mod vector;

View File

@ -0,0 +1,318 @@
use std::cmp;
use std::fmt;
use std::mem;
use std::u16;
use std::usize;
use packed::api::MatchKind;
/// The type used for representing a pattern identifier.
///
/// We don't use `usize` here because our packed searchers don't scale to
/// huge numbers of patterns, so we keep things a bit smaller.
pub type PatternID = u16;
/// A non-empty collection of non-empty patterns to search for.
///
/// This collection of patterns is what is passed around to both execute
/// searches and to construct the searchers themselves. Namely, this permits
/// searches to avoid copying all of the patterns, and allows us to keep only
/// one copy throughout all packed searchers.
///
/// Note that this collection is not a set. The same pattern can appear more
/// than once.
#[derive(Clone, Debug)]
pub struct Patterns {
/// The match semantics supported by this collection of patterns.
///
/// The match semantics determines the order of the iterator over patterns.
/// For leftmost-first, patterns are provided in the same order as were
/// provided by the caller. For leftmost-longest, patterns are provided in
/// descending order of length, with ties broken by the order in which they
/// were provided by the caller.
kind: MatchKind,
/// The collection of patterns, indexed by their identifier.
by_id: Vec<Vec<u8>>,
/// The order of patterns defined for iteration, given by pattern
/// identifiers. The order of `by_id` and `order` is always the same for
/// leftmost-first semantics, but may be different for leftmost-longest
/// semantics.
order: Vec<PatternID>,
/// The length of the smallest pattern, in bytes.
minimum_len: usize,
/// The largest pattern identifier. This should always be equivalent to
/// the number of patterns minus one in this collection.
max_pattern_id: PatternID,
/// The total number of pattern bytes across the entire collection. This
/// is used for reporting total heap usage in constant time.
total_pattern_bytes: usize,
}
impl Patterns {
/// Create a new collection of patterns for the given match semantics. The
/// ID of each pattern is the index of the pattern at which it occurs in
/// the `by_id` slice.
///
/// If any of the patterns in the slice given are empty, then this panics.
/// Similarly, if the number of patterns given is zero, then this also
/// panics.
pub fn new() -> Patterns {
Patterns {
kind: MatchKind::default(),
by_id: vec![],
order: vec![],
minimum_len: usize::MAX,
max_pattern_id: 0,
total_pattern_bytes: 0,
}
}
/// Add a pattern to this collection.
///
/// This panics if the pattern given is empty.
pub fn add(&mut self, bytes: &[u8]) {
assert!(!bytes.is_empty());
assert!(self.by_id.len() <= u16::MAX as usize);
let id = self.by_id.len() as u16;
self.max_pattern_id = id;
self.order.push(id);
self.by_id.push(bytes.to_vec());
self.minimum_len = cmp::min(self.minimum_len, bytes.len());
self.total_pattern_bytes += bytes.len();
}
/// Set the match kind semantics for this collection of patterns.
///
/// If the kind is not set, then the default is leftmost-first.
pub fn set_match_kind(&mut self, kind: MatchKind) {
match kind {
MatchKind::LeftmostFirst => {
self.order.sort();
}
MatchKind::LeftmostLongest => {
let (order, by_id) = (&mut self.order, &mut self.by_id);
order.sort_by(|&id1, &id2| {
by_id[id1 as usize]
.len()
.cmp(&by_id[id2 as usize].len())
.reverse()
});
}
MatchKind::__Nonexhaustive => unreachable!(),
}
}
/// Return the number of patterns in this collection.
///
/// This is guaranteed to be greater than zero.
pub fn len(&self) -> usize {
self.by_id.len()
}
/// Returns true if and only if this collection of patterns is empty.
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Returns the approximate total amount of heap used by these patterns, in
/// units of bytes.
pub fn heap_bytes(&self) -> usize {
self.order.len() * mem::size_of::<PatternID>()
+ self.by_id.len() * mem::size_of::<Vec<u8>>()
+ self.total_pattern_bytes
}
/// Clears all heap memory associated with this collection of patterns and
/// resets all state such that it is a valid empty collection.
pub fn reset(&mut self) {
self.kind = MatchKind::default();
self.by_id.clear();
self.order.clear();
self.minimum_len = usize::MAX;
self.max_pattern_id = 0;
}
/// Return the maximum pattern identifier in this collection. This can be
/// useful in searchers for ensuring that the collection of patterns they
/// are provided at search time and at build time have the same size.
pub fn max_pattern_id(&self) -> PatternID {
assert_eq!((self.max_pattern_id + 1) as usize, self.len());
self.max_pattern_id
}
/// Returns the length, in bytes, of the smallest pattern.
///
/// This is guaranteed to be at least one.
pub fn minimum_len(&self) -> usize {
self.minimum_len
}
/// Returns the match semantics used by these patterns.
pub fn match_kind(&self) -> &MatchKind {
&self.kind
}
/// Return the pattern with the given identifier. If such a pattern does
/// not exist, then this panics.
pub fn get(&self, id: PatternID) -> Pattern {
Pattern(&self.by_id[id as usize])
}
/// Return the pattern with the given identifier without performing bounds
/// checks.
///
/// # Safety
///
/// Callers must ensure that a pattern with the given identifier exists
/// before using this method.
#[cfg(target_arch = "x86_64")]
pub unsafe fn get_unchecked(&self, id: PatternID) -> Pattern {
Pattern(self.by_id.get_unchecked(id as usize))
}
/// Return an iterator over all the patterns in this collection, in the
/// order in which they should be matched.
///
/// Specifically, in a naive multi-pattern matcher, the following is
/// guaranteed to satisfy the match semantics of this collection of
/// patterns:
///
/// ```ignore
/// for i in 0..haystack.len():
/// for p in patterns.iter():
/// if haystack[i..].starts_with(p.bytes()):
/// return Match(p.id(), i, i + p.bytes().len())
/// ```
///
/// Namely, among the patterns in a collection, if they are matched in
/// the order provided by this iterator, then the result is guaranteed
/// to satisfy the correct match semantics. (Either leftmost-first or
/// leftmost-longest.)
pub fn iter(&self) -> PatternIter {
PatternIter { patterns: self, i: 0 }
}
}
/// An iterator over the patterns in the `Patterns` collection.
///
/// The order of the patterns provided by this iterator is consistent with the
/// match semantics of the originating collection of patterns.
///
/// The lifetime `'p` corresponds to the lifetime of the collection of patterns
/// this is iterating over.
#[derive(Debug)]
pub struct PatternIter<'p> {
patterns: &'p Patterns,
i: usize,
}
impl<'p> Iterator for PatternIter<'p> {
type Item = (PatternID, Pattern<'p>);
fn next(&mut self) -> Option<(PatternID, Pattern<'p>)> {
if self.i >= self.patterns.len() {
return None;
}
let id = self.patterns.order[self.i];
let p = self.patterns.get(id);
self.i += 1;
Some((id, p))
}
}
/// A pattern that is used in packed searching.
#[derive(Clone)]
pub struct Pattern<'a>(&'a [u8]);
impl<'a> fmt::Debug for Pattern<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("Pattern")
.field("lit", &String::from_utf8_lossy(&self.0))
.finish()
}
}
impl<'p> Pattern<'p> {
/// Returns the length of this pattern, in bytes.
pub fn len(&self) -> usize {
self.0.len()
}
/// Returns the bytes of this pattern.
pub fn bytes(&self) -> &[u8] {
&self.0
}
/// Returns the first `len` low nybbles from this pattern. If this pattern
/// is shorter than `len`, then this panics.
#[cfg(target_arch = "x86_64")]
pub fn low_nybbles(&self, len: usize) -> Vec<u8> {
let mut nybs = vec![];
for &b in self.bytes().iter().take(len) {
nybs.push(b & 0xF);
}
nybs
}
/// Returns true if this pattern is a prefix of the given bytes.
#[inline(always)]
pub fn is_prefix(&self, bytes: &[u8]) -> bool {
self.len() <= bytes.len() && self.equals(&bytes[..self.len()])
}
/// Returns true if and only if this pattern equals the given bytes.
#[inline(always)]
pub fn equals(&self, bytes: &[u8]) -> bool {
// Why not just use memcmp for this? Well, memcmp requires calling out
// to libc, and this routine is called in fairly hot code paths. Other
// than just calling out to libc, it also seems to result in worse
// codegen. By rolling our own memcpy in pure Rust, it seems to appear
// more friendly to the optimizer.
//
// This results in an improvement in just about every benchmark. Some
// smaller than others, but in some cases, up to 30% faster.
if self.len() != bytes.len() {
return false;
}
if self.len() < 8 {
for (&b1, &b2) in self.bytes().iter().zip(bytes) {
if b1 != b2 {
return false;
}
}
return true;
}
// When we have 8 or more bytes to compare, then proceed in chunks of
// 8 at a time using unaligned loads.
let mut p1 = self.bytes().as_ptr();
let mut p2 = bytes.as_ptr();
let p1end = self.bytes()[self.len() - 8..].as_ptr();
let p2end = bytes[bytes.len() - 8..].as_ptr();
// SAFETY: Via the conditional above, we know that both `p1` and `p2`
// have the same length, so `p1 < p1end` implies that `p2 < p2end`.
// Thus, derefencing both `p1` and `p2` in the loop below is safe.
//
// Moreover, we set `p1end` and `p2end` to be 8 bytes before the actual
// end of of `p1` and `p2`. Thus, the final dereference outside of the
// loop is guaranteed to be valid.
//
// Finally, we needn't worry about 64-bit alignment here, since we
// do unaligned loads.
unsafe {
while p1 < p1end {
let v1 = (p1 as *const u64).read_unaligned();
let v2 = (p2 as *const u64).read_unaligned();
if v1 != v2 {
return false;
}
p1 = p1.add(8);
p2 = p2.add(8);
}
let v1 = (p1end as *const u64).read_unaligned();
let v2 = (p2end as *const u64).read_unaligned();
v1 == v2
}
}
}

View File

@ -0,0 +1,185 @@
use std::mem;
use packed::pattern::{PatternID, Patterns};
use Match;
/// The type of the rolling hash used in the Rabin-Karp algorithm.
type Hash = usize;
/// The number of buckets to store our patterns in. We don't want this to be
/// too big in order to avoid wasting memory, but we don't want it to be too
/// small either to avoid spending too much time confirming literals.
///
/// The number of buckets MUST be a power of two. Otherwise, determining the
/// bucket from a hash will slow down the code considerably. Using a power
/// of two means `hash % NUM_BUCKETS` can compile down to a simple `and`
/// instruction.
const NUM_BUCKETS: usize = 64;
/// An implementation of the Rabin-Karp algorithm. The main idea of this
/// algorithm is to maintain a rolling hash as it moves through the input, and
/// then check whether that hash corresponds to the same hash for any of the
/// patterns we're looking for.
///
/// A draw back of naively scaling Rabin-Karp to multiple patterns is that
/// it requires all of the patterns to be the same length, which in turn
/// corresponds to the number of bytes to hash. We adapt this to work for
/// multiple patterns of varying size by fixing the number of bytes to hash
/// to be the length of the smallest pattern. We also split the patterns into
/// several buckets to hopefully make the confirmation step faster.
///
/// Wikipedia has a decent explanation, if a bit heavy on the theory:
/// https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
///
/// But ESMAJ provides something a bit more concrete:
/// http://www-igm.univ-mlv.fr/~lecroq/string/node5.html
#[derive(Clone, Debug)]
pub struct RabinKarp {
/// The order of patterns in each bucket is significant. Namely, they are
/// arranged such that the first one to match is the correct match. This
/// may not necessarily correspond to the order provided by the caller.
/// For example, if leftmost-longest semantics are used, then the patterns
/// are sorted by their length in descending order. If leftmost-first
/// semantics are used, then the patterns are sorted by their pattern ID
/// in ascending order (which corresponds to the caller's order).
buckets: Vec<Vec<(Hash, PatternID)>>,
/// The length of the hashing window. Generally, this corresponds to the
/// length of the smallest pattern.
hash_len: usize,
/// The factor to subtract out of a hash before updating it with a new
/// byte.
hash_2pow: usize,
/// The maximum identifier of a pattern. This is used as a sanity check
/// to ensure that the patterns provided by the caller are the same as
/// the patterns that were used to compile the matcher. This sanity check
/// possibly permits safely eliminating bounds checks regardless of what
/// patterns are provided by the caller.
///
/// (Currently, we don't use this to elide bounds checks since it doesn't
/// result in a measurable performance improvement, but we do use it for
/// better failure modes.)
max_pattern_id: PatternID,
}
impl RabinKarp {
/// Compile a new Rabin-Karp matcher from the patterns given.
///
/// This panics if any of the patterns in the collection are empty, or if
/// the collection is itself empty.
pub fn new(patterns: &Patterns) -> RabinKarp {
assert!(patterns.len() >= 1);
let hash_len = patterns.minimum_len();
assert!(hash_len >= 1);
let mut hash_2pow = 1usize;
for _ in 1..hash_len {
hash_2pow = hash_2pow.wrapping_shl(1);
}
let mut rk = RabinKarp {
buckets: vec![vec![]; NUM_BUCKETS],
hash_len,
hash_2pow,
max_pattern_id: patterns.max_pattern_id(),
};
for (id, pat) in patterns.iter() {
let hash = rk.hash(&pat.bytes()[..rk.hash_len]);
let bucket = hash % NUM_BUCKETS;
rk.buckets[bucket].push((hash, id));
}
rk
}
/// Return the first matching pattern in the given haystack, begining the
/// search at `at`.
pub fn find_at(
&self,
patterns: &Patterns,
haystack: &[u8],
mut at: usize,
) -> Option<Match> {
assert_eq!(NUM_BUCKETS, self.buckets.len());
assert_eq!(
self.max_pattern_id,
patterns.max_pattern_id(),
"Rabin-Karp must be called with same patterns it was built with",
);
if at + self.hash_len > haystack.len() {
return None;
}
let mut hash = self.hash(&haystack[at..at + self.hash_len]);
loop {
let bucket = &self.buckets[hash % NUM_BUCKETS];
for &(phash, pid) in bucket {
if phash == hash {
if let Some(c) = self.verify(patterns, pid, haystack, at) {
return Some(c);
}
}
}
if at + self.hash_len >= haystack.len() {
return None;
}
hash = self.update_hash(
hash,
haystack[at],
haystack[at + self.hash_len],
);
at += 1;
}
}
/// Returns the approximate total amount of heap used by this searcher, in
/// units of bytes.
pub fn heap_bytes(&self) -> usize {
let num_patterns = self.max_pattern_id as usize + 1;
self.buckets.len() * mem::size_of::<Vec<(Hash, PatternID)>>()
+ num_patterns * mem::size_of::<(Hash, PatternID)>()
}
/// Verify whether the pattern with the given id matches at
/// `haystack[at..]`.
///
/// We tag this function as `cold` because it helps improve codegen.
/// Intuitively, it would seem like inlining it would be better. However,
/// the only time this is called and a match is not found is when there
/// there is a hash collision, or when a prefix of a pattern matches but
/// the entire pattern doesn't match. This is hopefully fairly rare, and
/// if it does occur a lot, it's going to be slow no matter what we do.
#[cold]
fn verify(
&self,
patterns: &Patterns,
id: PatternID,
haystack: &[u8],
at: usize,
) -> Option<Match> {
let pat = patterns.get(id);
if pat.is_prefix(&haystack[at..]) {
Some(Match::from_span(id as usize, at, at + pat.len()))
} else {
None
}
}
/// Hash the given bytes.
fn hash(&self, bytes: &[u8]) -> Hash {
assert_eq!(self.hash_len, bytes.len());
let mut hash = 0usize;
for &b in bytes {
hash = hash.wrapping_shl(1).wrapping_add(b as usize);
}
hash
}
/// Update the hash given based on removing `old_byte` at the beginning
/// of some byte string, and appending `new_byte` to the end of that same
/// byte string.
fn update_hash(&self, prev: Hash, old_byte: u8, new_byte: u8) -> Hash {
prev.wrapping_sub((old_byte as usize).wrapping_mul(self.hash_2pow))
.wrapping_shl(1)
.wrapping_add(new_byte as usize)
}
}

View File

@ -0,0 +1,386 @@
Teddy is a simd accelerated multiple substring matching algorithm. The name
and the core ideas in the algorithm were learned from the [Hyperscan][1_u]
project. The implementation in this repository was mostly motivated for use in
accelerating regex searches by searching for small sets of required literals
extracted from the regex.
# Background
The key idea of Teddy is to do *packed* substring matching. In the literature,
packed substring matching is the idea of examining multiple bytes in a haystack
at a time to detect matches. Implementations of, for example, memchr (which
detects matches of a single byte) have been doing this for years. Only
recently, with the introduction of various SIMD instructions, has this been
extended to substring matching. The PCMPESTRI instruction (and its relatives),
for example, implements substring matching in hardware. It is, however, limited
to substrings of length 16 bytes or fewer, but this restriction is fine in a
regex engine, since we rarely care about the performance difference between
searching for a 16 byte literal and a 16 + N literal; 16 is already long
enough. The key downside of the PCMPESTRI instruction, on current (2016) CPUs
at least, is its latency and throughput. As a result, it is often faster to
do substring search with a Boyer-Moore (or Two-Way) variant and a well placed
memchr to quickly skip through the haystack.
There are fewer results from the literature on packed substring matching,
and even fewer for packed multiple substring matching. Ben-Kiki et al. [2]
describes use of PCMPESTRI for substring matching, but is mostly theoretical
and hand-waves performance. There is other theoretical work done by Bille [3]
as well.
The rest of the work in the field, as far as I'm aware, is by Faro and Kulekci
and is generally focused on multiple pattern search. Their first paper [4a]
introduces the concept of a fingerprint, which is computed for every block of
N bytes in every pattern. The haystack is then scanned N bytes at a time and
a fingerprint is computed in the same way it was computed for blocks in the
patterns. If the fingerprint corresponds to one that was found in a pattern,
then a verification step follows to confirm that one of the substrings with the
corresponding fingerprint actually matches at the current location. Various
implementation tricks are employed to make sure the fingerprint lookup is fast;
typically by truncating the fingerprint. (This may, of course, provoke more
steps in the verification process, so a balance must be struck.)
The main downside of [4a] is that the minimum substring length is 32 bytes,
presumably because of how the algorithm uses certain SIMD instructions. This
essentially makes it useless for general purpose regex matching, where a small
number of short patterns is far more likely.
Faro and Kulekci published another paper [4b] that is conceptually very similar
to [4a]. The key difference is that it uses the CRC32 instruction (introduced
as part of SSE 4.2) to compute fingerprint values. This also enables the
algorithm to work effectively on substrings as short as 7 bytes with 4 byte
windows. 7 bytes is unfortunately still too long. The window could be
technically shrunk to 2 bytes, thereby reducing minimum length to 3, but the
small window size ends up negating most performance benefits—and it's likely
the common case in a general purpose regex engine.
Faro and Kulekci also published [4c] that appears to be intended as a
replacement to using PCMPESTRI. In particular, it is specifically motivated by
the high throughput/latency time of PCMPESTRI and therefore chooses other SIMD
instructions that are faster. While this approach works for short substrings,
I personally couldn't see a way to generalize it to multiple substring search.
Faro and Kulekci have another paper [4d] that I haven't been able to read
because it is behind a paywall.
# Teddy
Finally, we get to Teddy. If the above literature review is complete, then it
appears that Teddy is a novel algorithm. More than that, in my experience, it
completely blows away the competition for short substrings, which is exactly
what we want in a general purpose regex engine. Again, the algorithm appears
to be developed by the authors of [Hyperscan][1_u]. Hyperscan was open sourced
late 2015, and no earlier history could be found. Therefore, tracking the exact
provenance of the algorithm with respect to the published literature seems
difficult.
At a high level, Teddy works somewhat similarly to the fingerprint algorithms
published by Faro and Kulekci, but Teddy does it in a way that scales a bit
better. Namely:
1. Teddy's core algorithm scans the haystack in 16 (for SSE, or 32 for AVX)
byte chunks. 16 (or 32) is significant because it corresponds to the number
of bytes in a SIMD vector.
2. Bitwise operations are performed on each chunk to discover if any region of
it matches a set of precomputed fingerprints from the patterns. If there are
matches, then a verification step is performed. In this implementation, our
verification step is naive. This can be improved upon.
The details to make this work are quite clever. First, we must choose how to
pick our fingerprints. In Hyperscan's implementation, I *believe* they use the
last N bytes of each substring, where N must be at least the minimum length of
any substring in the set being searched. In this implementation, we use the
first N bytes of each substring. (The tradeoffs between these choices aren't
yet clear to me.) We then must figure out how to quickly test whether an
occurrence of any fingerprint from the set of patterns appears in a 16 byte
block from the haystack. To keep things simple, let's assume N = 1 and examine
some examples to motivate the approach. Here are our patterns:
```ignore
foo
bar
baz
```
The corresponding fingerprints, for N = 1, are `f`, `b` and `b`. Now let's set
our 16 byte block to:
```ignore
bat cat foo bump
xxxxxxxxxxxxxxxx
```
To cut to the chase, Teddy works by using bitsets. In particular, Teddy creates
a mask that allows us to quickly compute membership of a fingerprint in a 16
byte block that also tells which pattern the fingerprint corresponds to. In
this case, our fingerprint is a single byte, so an appropriate abstraction is
a map from a single byte to a list of patterns that contain that fingerprint:
```ignore
f |--> foo
b |--> bar, baz
```
Now, all we need to do is figure out how to represent this map in vector space
and use normal SIMD operations to perform a lookup. The first simplification
we can make is to represent our patterns as bit fields occupying a single
byte. This is important, because a single SIMD vector can store 16 bytes.
```ignore
f |--> 00000001
b |--> 00000010, 00000100
```
How do we perform lookup though? It turns out that SSSE3 introduced a very cool
instruction called PSHUFB. The instruction takes two SIMD vectors, `A` and `B`,
and returns a third vector `C`. All vectors are treated as 16 8-bit integers.
`C` is formed by `C[i] = A[B[i]]`. (This is a bit of a simplification, but true
for the purposes of this algorithm. For full details, see [Intel's Intrinsics
Guide][5_u].) This essentially lets us use the values in `B` to lookup values
in `A`.
If we could somehow cause `B` to contain our 16 byte block from the haystack,
and if `A` could contain our bitmasks, then we'd end up with something like
this for `A`:
```ignore
0x00 0x01 ... 0x62 ... 0x66 ... 0xFF
A = 0 0 00000110 00000001 0
```
And if `B` contains our window from our haystack, we could use shuffle to take
the values from `B` and use them to look up our bitsets in `A`. But of course,
we can't do this because `A` in the above example contains 256 bytes, which
is much larger than the size of a SIMD vector.
Nybbles to the rescue! A nybble is 4 bits. Instead of one mask to hold all of
our bitsets, we can use two masks, where one mask corresponds to the lower four
bits of our fingerprint and the other mask corresponds to the upper four bits.
So our map now looks like:
```ignore
'f' & 0xF = 0x6 |--> 00000001
'f' >> 4 = 0x6 |--> 00000111
'b' & 0xF = 0x2 |--> 00000110
'b' >> 4 = 0x6 |--> 00000111
```
Notice that the bitsets for each nybble correspond to the union of all
fingerprints that contain that nybble. For example, both `f` and `b` have the
same upper 4 bits but differ on the lower 4 bits. Putting this together, we
have `A0`, `A1` and `B`, where `A0` is our mask for the lower nybble, `A1` is
our mask for the upper nybble and `B` is our 16 byte block from the haystack:
```ignore
0x00 0x01 0x02 0x03 ... 0x06 ... 0xF
A0 = 0 0 00000110 0 00000001 0
A1 = 0 0 0 0 00000111 0
B = b a t _ t p
B = 0x62 0x61 0x74 0x20 0x74 0x70
```
But of course, we can't use `B` with `PSHUFB` yet, since its values are 8 bits,
and we need indexes that are at most 4 bits (corresponding to one of 16
values). We can apply the same transformation to split `B` into lower and upper
nybbles as we did `A`. As before, `B0` corresponds to the lower nybbles and
`B1` corresponds to the upper nybbles:
```ignore
b a t _ c a t _ f o o _ b u m p
B0 = 0x2 0x1 0x4 0x0 0x3 0x1 0x4 0x0 0x6 0xF 0xF 0x0 0x2 0x5 0xD 0x0
B1 = 0x6 0x6 0x7 0x2 0x6 0x6 0x7 0x2 0x6 0x6 0x6 0x2 0x6 0x7 0x6 0x7
```
And now we have a nice correspondence. `B0` can index `A0` and `B1` can index
`A1`. Here's what we get when we apply `C0 = PSHUFB(A0, B0)`:
```ignore
b a ... f o ... p
A0[0x2] A0[0x1] A0[0x6] A0[0xF] A0[0x0]
C0 = 00000110 0 00000001 0 0
```
And `C1 = PSHUFB(A1, B1)`:
```ignore
b a ... f o ... p
A1[0x6] A1[0x6] A1[0x6] A1[0x6] A1[0x7]
C1 = 00000111 00000111 00000111 00000111 0
```
Notice how neither one of `C0` or `C1` is guaranteed to report fully correct
results all on its own. For example, `C1` claims that `b` is a fingerprint for
the pattern `foo` (since `A1[0x6] = 00000111`), and that `o` is a fingerprint
for all of our patterns. But if we combined `C0` and `C1` with an `AND`
operation:
```ignore
b a ... f o ... p
C = 00000110 0 00000001 0 0
```
Then we now have that `C[i]` contains a bitset corresponding to the matching
fingerprints in a haystack's 16 byte block, where `i` is the `ith` byte in that
block.
Once we have that, we can look for the position of the least significant bit
in `C`. (Least significant because we only target `x86_64` here, which is
always little endian. Thus, the least significant bytes correspond to bytes
in our haystack at a lower address.) That position, modulo `8`, gives us
the pattern that the fingerprint matches. That position, integer divided by
`8`, also gives us the byte offset that the fingerprint occurs in inside the
16 byte haystack block. Using those two pieces of information, we can run a
verification procedure that tries to match all substrings containing that
fingerprint at that position in the haystack.
# Implementation notes
The problem with the algorithm as described above is that it uses a single byte
for a fingerprint. This will work well if the fingerprints are rare in the
haystack (e.g., capital letters or special characters in normal English text),
but if the fingerprints are common, you'll wind up spending too much time in
the verification step, which effectively negates the performance benefits of
scanning 16 bytes at a time. Remember, the key to the performance of this
algorithm is to do as little work as possible per 16 (or 32) bytes.
This algorithm can be extrapolated in a relatively straight-forward way to use
larger fingerprints. That is, instead of a single byte prefix, we might use a
two or three byte prefix. The implementation here implements N = {1, 2, 3}
and always picks the largest N possible. The rationale is that the bigger the
fingerprint, the fewer verification steps we'll do. Of course, if N is too
large, then we'll end up doing too much on each step.
The way to extend it is:
1. Add a mask for each byte in the fingerprint. (Remember that each mask is
composed of two SIMD vectors.) This results in a value of `C` for each byte
in the fingerprint while searching.
2. When testing each 16 (or 32) byte block, each value of `C` must be shifted
so that they are aligned. Once aligned, they should all be `AND`'d together.
This will give you only the bitsets corresponding to the full match of the
fingerprint. To do this, one needs to save the last byte (for N=2) or last
two bytes (for N=3) from the previous iteration, and then line them up with
the first one or two bytes of the next iteration.
## Verification
Verification generally follows the procedure outlined above. The tricky parts
are in the right formulation of operations to get our bits out of our vectors.
We have a limited set of operations available to us on SIMD vectors as 128-bit
or 256-bit numbers, so we wind up needing to rip out 2 (or 4) 64-bit integers
from our vectors, and then run our verification step on each of those. The
verification step looks at the least significant bit set, and from its
position, we can derive the byte offset and bucket. (Again, as described
above.) Once we know the bucket, we do a fairly naive exhaustive search for
every literal in that bucket. (Hyperscan is a bit smarter here and uses a hash
table, but I haven't had time to thoroughly explore that. A few initial
half-hearted attempts resulted in worse performance.)
## AVX
The AVX version of Teddy extrapolates almost perfectly from the SSE version.
The only hickup is that PALIGNR is used to align chunks in the 16-bit version,
and there is no equivalent instruction in AVX. AVX does have VPALIGNR, but it
only works within 128-bit lanes. So there's a bit of tomfoolery to get around
this by shuffling the vectors before calling VPALIGNR.
The only other aspect to AVX is that since our masks are still fundamentally
16-bytes (0x0-0xF), they are duplicated to 32-bytes, so that they can apply to
32-byte chunks.
## Fat Teddy
In the version of Teddy described above, 8 buckets are used to group patterns
that we want to search for. However, when AVX is available, we can extend the
number of buckets to 16 by permitting each byte in our masks to use 16-bits
instead of 8-bits to represent the buckets it belongs to. (This variant is also
in Hyperscan.) However, what we give up is the ability to scan 32 bytes at a
time, even though we're using AVX. Instead, we have to scan 16 bytes at a time.
What we gain, though, is (hopefully) less work in our verification routine.
It patterns are more spread out across more buckets, then there should overall
be fewer false positives. In general, Fat Teddy permits us to grow our capacity
a bit and search for more literals before Teddy gets overwhelmed.
The tricky part of Fat Teddy is in how we adjust our masks and our verification
procedure. For the masks, we simply represent the first 8 buckets in each of
the low 16 bytes, and then the second 8 buckets in each of the high 16 bytes.
Then, in the search loop, instead of loading 32 bytes from the haystack, we
load the same 16 bytes from the haystack into both the low and high 16 byte
portions of our 256-bit vector. So for example, a mask might look like this:
bits: 00100001 00000000 ... 11000000 00000000 00000001 ... 00000000
byte: 31 30 16 15 14 0
offset: 15 14 0 15 14 0
buckets: 8-15 8-15 8-15 0-7 0-7 0-7
Where `byte` is the position in the vector (higher numbers corresponding to
more significant bits), `offset` is the corresponding position in the haystack
chunk, and `buckets` corresponds to the bucket assignments for that particular
byte.
In particular, notice that the bucket assignments for offset `0` are spread
out between bytes `0` and `16`. This works well for the chunk-by-chunk search
procedure, but verification really wants to process all bucket assignments for
each offset at once. Otherwise, we might wind up finding a match at offset
`1` in one the first 8 buckets, when we really should have reported a match
at offset `0` in one of the second 8 buckets. (Because we want the leftmost
match.)
Thus, for verification, we rearrange the above vector such that it is a
sequence of 16-bit integers, where the least significant 16-bit integer
corresponds to all of the bucket assignments for offset `0`. So with the
above vector, the least significant 16-bit integer would be
11000000 000000
which was taken from bytes `16` and `0`. Then the verification step pretty much
runs as described, except with 16 buckets instead of 8.
# References
- **[1]** [Hyperscan on GitHub](https://github.com/01org/hyperscan),
[webpage](https://01.org/hyperscan)
- **[2a]** Ben-Kiki, O., Bille, P., Breslauer, D., Gasieniec, L., Grossi, R.,
& Weimann, O. (2011).
_Optimal packed string matching_.
In LIPIcs-Leibniz International Proceedings in Informatics (Vol. 13).
Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik.
DOI: 10.4230/LIPIcs.FSTTCS.2011.423.
[PDF](http://drops.dagstuhl.de/opus/volltexte/2011/3355/pdf/37.pdf).
- **[2b]** Ben-Kiki, O., Bille, P., Breslauer, D., Ga̧sieniec, L., Grossi, R.,
& Weimann, O. (2014).
_Towards optimal packed string matching_.
Theoretical Computer Science, 525, 111-129.
DOI: 10.1016/j.tcs.2013.06.013.
[PDF](http://www.cs.haifa.ac.il/~oren/Publications/bpsm.pdf).
- **[3]** Bille, P. (2011).
_Fast searching in packed strings_.
Journal of Discrete Algorithms, 9(1), 49-56.
DOI: 10.1016/j.jda.2010.09.003.
[PDF](http://www.sciencedirect.com/science/article/pii/S1570866710000353).
- **[4a]** Faro, S., & Külekci, M. O. (2012, October).
_Fast multiple string matching using streaming SIMD extensions technology_.
In String Processing and Information Retrieval (pp. 217-228).
Springer Berlin Heidelberg.
DOI: 10.1007/978-3-642-34109-0_23.
[PDF](http://www.dmi.unict.it/~faro/papers/conference/faro32.pdf).
- **[4b]** Faro, S., & Külekci, M. O. (2013, September).
_Towards a Very Fast Multiple String Matching Algorithm for Short Patterns_.
In Stringology (pp. 78-91).
[PDF](http://www.dmi.unict.it/~faro/papers/conference/faro36.pdf).
- **[4c]** Faro, S., & Külekci, M. O. (2013, January).
_Fast packed string matching for short patterns_.
In Proceedings of the Meeting on Algorithm Engineering & Expermiments
(pp. 113-121).
Society for Industrial and Applied Mathematics.
[PDF](http://arxiv.org/pdf/1209.6449.pdf).
- **[4d]** Faro, S., & Külekci, M. O. (2014).
_Fast and flexible packed string matching_.
Journal of Discrete Algorithms, 28, 61-72.
DOI: 10.1016/j.jda.2014.07.003.
[1_u]: https://github.com/01org/hyperscan
[5_u]: https://software.intel.com/sites/landingpage/IntrinsicsGuide

View File

@ -0,0 +1,414 @@
// See the README in this directory for an explanation of the Teddy algorithm.
use std::cmp;
use std::collections::BTreeMap;
use std::fmt;
use packed::pattern::{PatternID, Patterns};
use packed::teddy::Teddy;
/// A builder for constructing a Teddy matcher.
///
/// The builder primarily permits fine grained configuration of the Teddy
/// matcher. Most options are made only available for testing/benchmarking
/// purposes. In reality, options are automatically determined by the nature
/// and number of patterns given to the builder.
#[derive(Clone, Debug)]
pub struct Builder {
/// When none, this is automatically determined. Otherwise, `false` means
/// slim Teddy is used (8 buckets) and `true` means fat Teddy is used
/// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't
/// available and Fat Teddy was requested, no matcher will be built.
fat: Option<bool>,
/// When none, this is automatically determined. Otherwise, `false` means
/// that 128-bit vectors will be used (up to SSSE3 instructions) where as
/// `true` means that 256-bit vectors will be used. As with `fat`, if
/// 256-bit vectors are requested and they aren't available, then a
/// searcher will not be built.
avx: Option<bool>,
}
impl Default for Builder {
fn default() -> Builder {
Builder::new()
}
}
impl Builder {
/// Create a new builder for configuring a Teddy matcher.
pub fn new() -> Builder {
Builder { fat: None, avx: None }
}
/// Build a matcher for the set of patterns given. If a matcher could not
/// be built, then `None` is returned.
///
/// Generally, a matcher isn't built if the necessary CPU features aren't
/// available, an unsupported target or if the searcher is believed to be
/// slower than standard techniques (i.e., if there are too many literals).
pub fn build(&self, patterns: &Patterns) -> Option<Teddy> {
self.build_imp(patterns)
}
/// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses
/// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful
/// for a larger set of literals.
///
/// `None` is the default, which results in an automatic selection based
/// on the number of literals and available CPU features.
pub fn fat(&mut self, yes: Option<bool>) -> &mut Builder {
self.fat = yes;
self
}
/// Request the use of 256-bit vectors (true) or 128-bit vectors (false).
/// Generally, a larger vector size is better since it either permits
/// matching more patterns or matching more bytes in the haystack at once.
///
/// `None` is the default, which results in an automatic selection based on
/// the number of literals and available CPU features.
pub fn avx(&mut self, yes: Option<bool>) -> &mut Builder {
self.avx = yes;
self
}
fn build_imp(&self, patterns: &Patterns) -> Option<Teddy> {
use packed::teddy::runtime;
// Most of the logic here is just about selecting the optimal settings,
// or perhaps even rejecting construction altogether. The choices
// we have are: fat (avx only) or not, ssse3 or avx2, and how many
// patterns we allow ourselves to search. Additionally, for testing
// and benchmarking, we permit callers to try to "force" a setting,
// and if the setting isn't allowed (e.g., forcing AVX when AVX isn't
// available), then we bail and return nothing.
if patterns.len() > 64 {
return None;
}
let has_ssse3 = is_x86_feature_detected!("ssse3");
let has_avx = is_x86_feature_detected!("avx2");
let avx = if self.avx == Some(true) {
if !has_avx {
return None;
}
true
} else if self.avx == Some(false) {
if !has_ssse3 {
return None;
}
false
} else if !has_ssse3 && !has_avx {
return None;
} else {
has_avx
};
let fat = match self.fat {
None => avx && patterns.len() > 32,
Some(false) => false,
Some(true) if !avx => return None,
Some(true) => true,
};
let mut compiler = Compiler::new(patterns, fat);
compiler.compile();
let Compiler { buckets, masks, .. } = compiler;
// SAFETY: It is required that the builder only produce Teddy matchers
// that are allowed to run on the current CPU, since we later assume
// that the presence of (for example) TeddySlim1Mask256 means it is
// safe to call functions marked with the `avx2` target feature.
match (masks.len(), avx, fat) {
(1, false, _) => Some(Teddy {
buckets: buckets,
max_pattern_id: patterns.max_pattern_id(),
exec: runtime::Exec::TeddySlim1Mask128(
runtime::TeddySlim1Mask128 {
mask1: runtime::Mask128::new(masks[0]),
},
),
}),
(1, true, false) => Some(Teddy {
buckets: buckets,
max_pattern_id: patterns.max_pattern_id(),
exec: runtime::Exec::TeddySlim1Mask256(
runtime::TeddySlim1Mask256 {
mask1: runtime::Mask256::new(masks[0]),
},
),
}),
(1, true, true) => Some(Teddy {
buckets: buckets,
max_pattern_id: patterns.max_pattern_id(),
exec: runtime::Exec::TeddyFat1Mask256(
runtime::TeddyFat1Mask256 {
mask1: runtime::Mask256::new(masks[0]),
},
),
}),
(2, false, _) => Some(Teddy {
buckets: buckets,
max_pattern_id: patterns.max_pattern_id(),
exec: runtime::Exec::TeddySlim2Mask128(
runtime::TeddySlim2Mask128 {
mask1: runtime::Mask128::new(masks[0]),
mask2: runtime::Mask128::new(masks[1]),
},
),
}),
(2, true, false) => Some(Teddy {
buckets: buckets,
max_pattern_id: patterns.max_pattern_id(),
exec: runtime::Exec::TeddySlim2Mask256(
runtime::TeddySlim2Mask256 {
mask1: runtime::Mask256::new(masks[0]),
mask2: runtime::Mask256::new(masks[1]),
},
),
}),
(2, true, true) => Some(Teddy {
buckets: buckets,
max_pattern_id: patterns.max_pattern_id(),
exec: runtime::Exec::TeddyFat2Mask256(
runtime::TeddyFat2Mask256 {
mask1: runtime::Mask256::new(masks[0]),
mask2: runtime::Mask256::new(masks[1]),
},
),
}),
(3, false, _) => Some(Teddy {
buckets: buckets,
max_pattern_id: patterns.max_pattern_id(),
exec: runtime::Exec::TeddySlim3Mask128(
runtime::TeddySlim3Mask128 {
mask1: runtime::Mask128::new(masks[0]),
mask2: runtime::Mask128::new(masks[1]),
mask3: runtime::Mask128::new(masks[2]),
},
),
}),
(3, true, false) => Some(Teddy {
buckets: buckets,
max_pattern_id: patterns.max_pattern_id(),
exec: runtime::Exec::TeddySlim3Mask256(
runtime::TeddySlim3Mask256 {
mask1: runtime::Mask256::new(masks[0]),
mask2: runtime::Mask256::new(masks[1]),
mask3: runtime::Mask256::new(masks[2]),
},
),
}),
(3, true, true) => Some(Teddy {
buckets: buckets,
max_pattern_id: patterns.max_pattern_id(),
exec: runtime::Exec::TeddyFat3Mask256(
runtime::TeddyFat3Mask256 {
mask1: runtime::Mask256::new(masks[0]),
mask2: runtime::Mask256::new(masks[1]),
mask3: runtime::Mask256::new(masks[2]),
},
),
}),
_ => unreachable!(),
}
}
}
/// A compiler is in charge of allocating patterns into buckets and generating
/// the masks necessary for searching.
#[derive(Clone)]
struct Compiler<'p> {
patterns: &'p Patterns,
buckets: Vec<Vec<PatternID>>,
masks: Vec<Mask>,
}
impl<'p> Compiler<'p> {
/// Create a new Teddy compiler for the given patterns. If `fat` is true,
/// then 16 buckets will be used instead of 8.
///
/// This panics if any of the patterns given are empty.
fn new(patterns: &'p Patterns, fat: bool) -> Compiler<'p> {
let mask_len = cmp::min(3, patterns.minimum_len());
assert!(1 <= mask_len && mask_len <= 3);
Compiler {
patterns,
buckets: vec![vec![]; if fat { 16 } else { 8 }],
masks: vec![Mask::default(); mask_len],
}
}
/// Compile the patterns in this compiler into buckets and masks.
fn compile(&mut self) {
let mut lonibble_to_bucket: BTreeMap<Vec<u8>, usize> = BTreeMap::new();
for (id, pattern) in self.patterns.iter() {
// We try to be slightly clever in how we assign patterns into
// buckets. Generally speaking, we want patterns with the same
// prefix to be in the same bucket, since it minimizes the amount
// of time we spend churning through buckets in the verification
// step.
//
// So we could assign patterns with the same N-prefix (where N
// is the size of the mask, which is one of {1, 2, 3}) to the
// same bucket. However, case insensitive searches are fairly
// common, so we'd for example, ideally want to treat `abc` and
// `ABC` as if they shared the same prefix. ASCII has the nice
// property that the lower 4 bits of A and a are the same, so we
// therefore group patterns with the same low-nybbe-N-prefix into
// the same bucket.
//
// MOREOVER, this is actually necessary for correctness! In
// particular, by grouping patterns with the same prefix into the
// same bucket, we ensure that we preserve correct leftmost-first
// and leftmost-longest match semantics. In addition to the fact
// that `patterns.iter()` iterates in the correct order, this
// guarantees that all possible ambiguous matches will occur in
// the same bucket. The verification routine could be adjusted to
// support correct leftmost match semantics regardless of bucket
// allocation, but that results in a performance hit. It's much
// nicer to be able to just stop as soon as a match is found.
let lonybs = pattern.low_nybbles(self.masks.len());
if let Some(&bucket) = lonibble_to_bucket.get(&lonybs) {
self.buckets[bucket].push(id);
} else {
// N.B. We assign buckets in reverse because it shouldn't have
// any influence on performance, but it does make it harder to
// get leftmost match semantics accidentally correct.
let bucket = (self.buckets.len() - 1)
- (id as usize % self.buckets.len());
self.buckets[bucket].push(id);
lonibble_to_bucket.insert(lonybs, bucket);
}
}
for (bucket_index, bucket) in self.buckets.iter().enumerate() {
for &pat_id in bucket {
let pat = self.patterns.get(pat_id);
for (i, mask) in self.masks.iter_mut().enumerate() {
if self.buckets.len() == 8 {
mask.add_slim(bucket_index as u8, pat.bytes()[i]);
} else {
mask.add_fat(bucket_index as u8, pat.bytes()[i]);
}
}
}
}
}
}
impl<'p> fmt::Debug for Compiler<'p> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut buckets = vec![vec![]; self.buckets.len()];
for (i, bucket) in self.buckets.iter().enumerate() {
for &patid in bucket {
buckets[i].push(self.patterns.get(patid));
}
}
f.debug_struct("Compiler")
.field("buckets", &buckets)
.field("masks", &self.masks)
.finish()
}
}
/// Mask represents the low and high nybble masks that will be used during
/// search. Each mask is 32 bytes wide, although only the first 16 bytes are
/// used for the SSSE3 runtime.
///
/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set
/// if and only if the corresponding nybble is in the ith bucket. The index of
/// the byte (0-15, inclusive) corresponds to the nybble.
///
/// Each mask is used as the target of a shuffle, where the indices for the
/// shuffle are taken from the haystack. AND'ing the shuffles for both the
/// low and high masks together also results in 8-bit bitsets, but where bit
/// `i` is set if and only if the correspond *byte* is in the ith bucket.
///
/// During compilation, masks are just arrays. But during search, these masks
/// are represented as 128-bit or 256-bit vectors.
///
/// (See the README is this directory for more details.)
#[derive(Clone, Copy, Default)]
pub struct Mask {
lo: [u8; 32],
hi: [u8; 32],
}
impl Mask {
/// Update this mask by adding the given byte to the given bucket. The
/// given bucket must be in the range 0-7.
///
/// This is for "slim" Teddy, where there are only 8 buckets.
fn add_slim(&mut self, bucket: u8, byte: u8) {
assert!(bucket < 8);
let byte_lo = (byte & 0xF) as usize;
let byte_hi = ((byte >> 4) & 0xF) as usize;
// When using 256-bit vectors, we need to set this bucket assignment in
// the low and high 128-bit portions of the mask. This allows us to
// process 32 bytes at a time. Namely, AVX2 shuffles operate on each
// of the 128-bit lanes, rather than the full 256-bit vector at once.
self.lo[byte_lo] |= 1 << bucket;
self.lo[byte_lo + 16] |= 1 << bucket;
self.hi[byte_hi] |= 1 << bucket;
self.hi[byte_hi + 16] |= 1 << bucket;
}
/// Update this mask by adding the given byte to the given bucket. The
/// given bucket must be in the range 0-15.
///
/// This is for "fat" Teddy, where there are 16 buckets.
fn add_fat(&mut self, bucket: u8, byte: u8) {
assert!(bucket < 16);
let byte_lo = (byte & 0xF) as usize;
let byte_hi = ((byte >> 4) & 0xF) as usize;
// Unlike slim teddy, fat teddy only works with AVX2. For fat teddy,
// the high 128 bits of our mask correspond to buckets 8-15, while the
// low 128 bits correspond to buckets 0-7.
if bucket < 8 {
self.lo[byte_lo] |= 1 << bucket;
self.hi[byte_hi] |= 1 << bucket;
} else {
self.lo[byte_lo + 16] |= 1 << (bucket % 8);
self.hi[byte_hi + 16] |= 1 << (bucket % 8);
}
}
/// Return the low 128 bits of the low-nybble mask.
pub fn lo128(&self) -> [u8; 16] {
let mut tmp = [0; 16];
tmp.copy_from_slice(&self.lo[..16]);
tmp
}
/// Return the full low-nybble mask.
pub fn lo256(&self) -> [u8; 32] {
self.lo
}
/// Return the low 128 bits of the high-nybble mask.
pub fn hi128(&self) -> [u8; 16] {
let mut tmp = [0; 16];
tmp.copy_from_slice(&self.hi[..16]);
tmp
}
/// Return the full high-nybble mask.
pub fn hi256(&self) -> [u8; 32] {
self.hi
}
}
impl fmt::Debug for Mask {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let (mut parts_lo, mut parts_hi) = (vec![], vec![]);
for i in 0..32 {
parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i]));
parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i]));
}
f.debug_struct("Mask")
.field("lo", &parts_lo)
.field("hi", &parts_hi)
.finish()
}
}

View File

@ -0,0 +1,62 @@
#[cfg(target_arch = "x86_64")]
pub use packed::teddy::compile::Builder;
#[cfg(not(target_arch = "x86_64"))]
pub use packed::teddy::fallback::Builder;
#[cfg(not(target_arch = "x86_64"))]
pub use packed::teddy::fallback::Teddy;
#[cfg(target_arch = "x86_64")]
pub use packed::teddy::runtime::Teddy;
#[cfg(target_arch = "x86_64")]
mod compile;
#[cfg(target_arch = "x86_64")]
mod runtime;
#[cfg(not(target_arch = "x86_64"))]
mod fallback {
use packed::pattern::Patterns;
use Match;
#[derive(Clone, Debug, Default)]
pub struct Builder(());
impl Builder {
pub fn new() -> Builder {
Builder(())
}
pub fn build(&self, _: &Patterns) -> Option<Teddy> {
None
}
pub fn fat(&mut self, _: Option<bool>) -> &mut Builder {
self
}
pub fn avx(&mut self, _: Option<bool>) -> &mut Builder {
self
}
}
#[derive(Clone, Debug)]
pub struct Teddy(());
impl Teddy {
pub fn find_at(
&self,
_: &Patterns,
_: &[u8],
_: usize,
) -> Option<Match> {
None
}
pub fn minimum_len(&self) -> usize {
0
}
pub fn heap_bytes(&self) -> usize {
0
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,568 @@
use std::collections::HashMap;
use std::usize;
use packed::{Config, MatchKind};
use Match;
/// A description of a single test against a multi-pattern searcher.
///
/// A single test may not necessarily pass on every configuration of a
/// searcher. The tests are categorized and grouped appropriately below.
#[derive(Clone, Debug, Eq, PartialEq)]
struct SearchTest {
/// The name of this test, for debugging.
name: &'static str,
/// The patterns to search for.
patterns: &'static [&'static str],
/// The text to search.
haystack: &'static str,
/// Each match is a triple of (pattern_index, start, end), where
/// pattern_index is an index into `patterns` and `start`/`end` are indices
/// into `haystack`.
matches: &'static [(usize, usize, usize)],
}
struct SearchTestOwned {
offset: usize,
name: String,
patterns: Vec<String>,
haystack: String,
matches: Vec<(usize, usize, usize)>,
}
impl SearchTest {
fn variations(&self) -> Vec<SearchTestOwned> {
let mut tests = vec![];
for i in 0..=260 {
tests.push(self.offset_prefix(i));
tests.push(self.offset_suffix(i));
tests.push(self.offset_both(i));
}
tests
}
fn offset_both(&self, off: usize) -> SearchTestOwned {
SearchTestOwned {
offset: off,
name: self.name.to_string(),
patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
haystack: format!(
"{}{}{}",
"Z".repeat(off),
self.haystack,
"Z".repeat(off)
),
matches: self
.matches
.iter()
.map(|&(id, s, e)| (id, s + off, e + off))
.collect(),
}
}
fn offset_prefix(&self, off: usize) -> SearchTestOwned {
SearchTestOwned {
offset: off,
name: self.name.to_string(),
patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
haystack: format!("{}{}", "Z".repeat(off), self.haystack),
matches: self
.matches
.iter()
.map(|&(id, s, e)| (id, s + off, e + off))
.collect(),
}
}
fn offset_suffix(&self, off: usize) -> SearchTestOwned {
SearchTestOwned {
offset: off,
name: self.name.to_string(),
patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
haystack: format!("{}{}", self.haystack, "Z".repeat(off)),
matches: self.matches.to_vec(),
}
}
// fn to_owned(&self) -> SearchTestOwned {
// SearchTestOwned {
// name: self.name.to_string(),
// patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
// haystack: self.haystack.to_string(),
// matches: self.matches.iter().cloned().collect(),
// }
// }
}
/// Short-hand constructor for SearchTest. We use it a lot below.
macro_rules! t {
($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
SearchTest {
name: stringify!($name),
patterns: $patterns,
haystack: $haystack,
matches: $matches,
}
};
}
/// A collection of test groups.
type TestCollection = &'static [&'static [SearchTest]];
// Define several collections corresponding to the different type of match
// semantics supported. These collections have some overlap, but each
// collection should have some tests that no other collection has.
/// Tests for leftmost-first match semantics.
const PACKED_LEFTMOST_FIRST: TestCollection =
&[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY];
/// Tests for leftmost-longest match semantics.
const PACKED_LEFTMOST_LONGEST: TestCollection =
&[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY];
// Now define the individual tests that make up the collections above.
/// A collection of tests for the that should always be true regardless of
/// match semantics. That is, all combinations of leftmost-{first, longest}
/// should produce the same answer.
const BASICS: &'static [SearchTest] = &[
t!(basic001, &["a"], "", &[]),
t!(basic010, &["a"], "a", &[(0, 0, 1)]),
t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
t!(basic060, &["a"], "bbb", &[]),
t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
t!(basic100, &["aa"], "", &[]),
t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
t!(basic130, &["aa"], "abbab", &[]),
t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]),
t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
t!(basic300, &["a", "b"], "", &[]),
t!(basic310, &["a", "b"], "z", &[]),
t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
t!(
basic340,
&["a", "b"],
"abba",
&[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
),
t!(
basic350,
&["b", "a"],
"abba",
&[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
),
t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
t!(basic400, &["foo", "bar"], "", &[]),
t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
t!(
basic720,
&["yabcdef", "bcdeyabc", "abcdezghi"],
"yabcdezghi",
&[(2, 1, 10),]
),
t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
t!(
basic840,
&["ab", "ba"],
"abababa",
&[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
),
t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
];
/// Tests for leftmost match semantics. These should pass for both
/// leftmost-first and leftmost-longest match kinds. Stated differently, among
/// ambiguous matches, the longest match and the match that appeared first when
/// constructing the automaton should always be the same.
const LEFTMOST: &'static [SearchTest] = &[
t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
t!(
leftmost360,
&["abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(2, 0, 8),]
),
t!(
leftmost370,
&["abcdefghi", "cde", "hz", "abcdefgh"],
"abcdefghz",
&[(3, 0, 8),]
),
t!(
leftmost380,
&["abcdefghi", "hz", "abcdefgh", "a"],
"abcdefghz",
&[(2, 0, 8),]
),
t!(
leftmost390,
&["b", "abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(3, 0, 8),]
),
t!(
leftmost400,
&["h", "abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(3, 0, 8),]
),
t!(
leftmost410,
&["z", "abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(3, 0, 8), (0, 8, 9),]
),
];
/// Tests for non-overlapping leftmost-first match semantics. These tests
/// should generally be specific to leftmost-first, which means they should
/// generally fail under leftmost-longest semantics.
const LEFTMOST_FIRST: &'static [SearchTest] = &[
t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
t!(
leftfirst310,
&["abcd", "b", "bce", "ce"],
"abce",
&[(1, 1, 2), (3, 2, 4),]
),
t!(
leftfirst320,
&["a", "abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(0, 0, 1), (2, 7, 9),]
),
t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
t!(
leftfirst340,
&["abcdef", "x", "x", "x", "x", "x", "x", "abcde"],
"abcdef",
&[(0, 0, 6)]
),
];
/// Tests for non-overlapping leftmost-longest match semantics. These tests
/// should generally be specific to leftmost-longest, which means they should
/// generally fail under leftmost-first semantics.
const LEFTMOST_LONGEST: &'static [SearchTest] = &[
t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
t!(
leftlong310,
&["a", "abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(3, 0, 8),]
),
t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
];
/// Regression tests that are applied to all combinations.
///
/// If regression tests are needed for specific match semantics, then add them
/// to the appropriate group above.
const REGRESSION: &'static [SearchTest] = &[
t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
t!(
regression030,
&["libcore/", "libstd/"],
"libcore/char/methods.rs",
&[(0, 0, 8),]
),
t!(
regression040,
&["libstd/", "libcore/"],
"libcore/char/methods.rs",
&[(1, 0, 8),]
),
t!(
regression050,
&["\x00\x00\x01", "\x00\x00\x00"],
"\x00\x00\x00",
&[(1, 0, 3),]
),
t!(
regression060,
&["\x00\x00\x00", "\x00\x00\x01"],
"\x00\x00\x00",
&[(0, 0, 3),]
),
];
const TEDDY: &'static [SearchTest] = &[
t!(
teddy010,
&["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
"abcdefghijk",
&[
(0, 0, 1),
(1, 1, 2),
(2, 2, 3),
(3, 3, 4),
(4, 4, 5),
(5, 5, 6),
(6, 6, 7),
(7, 7, 8),
(8, 8, 9),
(9, 9, 10),
(10, 10, 11)
]
),
t!(
teddy020,
&["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"],
"abcdefghijk",
&[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),]
),
t!(
teddy030,
&["abc"],
"abcdefghijklmnopqrstuvwxyzabcdefghijk",
&[(0, 0, 3), (0, 26, 29)]
),
];
// Now define a test for each combination of things above that we want to run.
// Since there are a few different combinations for each collection of tests,
// we define a couple of macros to avoid repetition drudgery. The testconfig
// macro constructs the automaton from a given match kind, and runs the search
// tests one-by-one over the given collection. The `with` parameter allows one
// to configure the config with additional parameters. The testcombo macro
// invokes testconfig in precisely this way: it sets up several tests where
// each one turns a different knob on Config.
macro_rules! testconfig {
($name:ident, $collection:expr, $with:expr) => {
#[test]
fn $name() {
run_search_tests($collection, |test| {
let mut config = Config::new();
$with(&mut config);
config
.builder()
.extend(test.patterns.iter().map(|p| p.as_bytes()))
.build()
.unwrap()
.find_iter(&test.haystack)
.collect()
});
}
};
}
#[cfg(target_arch = "x86_64")]
testconfig!(
search_default_leftmost_first,
PACKED_LEFTMOST_FIRST,
|_: &mut Config| {}
);
#[cfg(target_arch = "x86_64")]
testconfig!(
search_default_leftmost_longest,
PACKED_LEFTMOST_LONGEST,
|c: &mut Config| {
c.match_kind(MatchKind::LeftmostLongest);
}
);
#[cfg(target_arch = "x86_64")]
testconfig!(
search_teddy_leftmost_first,
PACKED_LEFTMOST_FIRST,
|c: &mut Config| {
c.force_teddy(true);
}
);
#[cfg(target_arch = "x86_64")]
testconfig!(
search_teddy_leftmost_longest,
PACKED_LEFTMOST_LONGEST,
|c: &mut Config| {
c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
}
);
#[cfg(target_arch = "x86_64")]
testconfig!(
search_teddy_ssse3_leftmost_first,
PACKED_LEFTMOST_FIRST,
|c: &mut Config| {
c.force_teddy(true);
if is_x86_feature_detected!("ssse3") {
c.force_avx(Some(false));
}
}
);
#[cfg(target_arch = "x86_64")]
testconfig!(
search_teddy_ssse3_leftmost_longest,
PACKED_LEFTMOST_LONGEST,
|c: &mut Config| {
c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
if is_x86_feature_detected!("ssse3") {
c.force_avx(Some(false));
}
}
);
#[cfg(target_arch = "x86_64")]
testconfig!(
search_teddy_avx2_leftmost_first,
PACKED_LEFTMOST_FIRST,
|c: &mut Config| {
c.force_teddy(true);
if is_x86_feature_detected!("avx2") {
c.force_avx(Some(true));
}
}
);
#[cfg(target_arch = "x86_64")]
testconfig!(
search_teddy_avx2_leftmost_longest,
PACKED_LEFTMOST_LONGEST,
|c: &mut Config| {
c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
if is_x86_feature_detected!("avx2") {
c.force_avx(Some(true));
}
}
);
#[cfg(target_arch = "x86_64")]
testconfig!(
search_teddy_fat_leftmost_first,
PACKED_LEFTMOST_FIRST,
|c: &mut Config| {
c.force_teddy(true);
if is_x86_feature_detected!("avx2") {
c.force_teddy_fat(Some(true));
}
}
);
#[cfg(target_arch = "x86_64")]
testconfig!(
search_teddy_fat_leftmost_longest,
PACKED_LEFTMOST_LONGEST,
|c: &mut Config| {
c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
if is_x86_feature_detected!("avx2") {
c.force_teddy_fat(Some(true));
}
}
);
testconfig!(
search_rabinkarp_leftmost_first,
PACKED_LEFTMOST_FIRST,
|c: &mut Config| {
c.force_rabin_karp(true);
}
);
testconfig!(
search_rabinkarp_leftmost_longest,
PACKED_LEFTMOST_LONGEST,
|c: &mut Config| {
c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest);
}
);
#[test]
fn search_tests_have_unique_names() {
let assert = |constname, tests: &[SearchTest]| {
let mut seen = HashMap::new(); // map from test name to position
for (i, test) in tests.iter().enumerate() {
if !seen.contains_key(test.name) {
seen.insert(test.name, i);
} else {
let last = seen[test.name];
panic!(
"{} tests have duplicate names at positions {} and {}",
constname, last, i
);
}
}
};
assert("BASICS", BASICS);
assert("LEFTMOST", LEFTMOST);
assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
assert("REGRESSION", REGRESSION);
assert("TEDDY", TEDDY);
}
fn run_search_tests<F: FnMut(&SearchTestOwned) -> Vec<Match>>(
which: TestCollection,
mut f: F,
) {
let get_match_triples =
|matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
matches
.into_iter()
.map(|m| (m.pattern(), m.start(), m.end()))
.collect()
};
for &tests in which {
for spec in tests {
for test in spec.variations() {
assert_eq!(
test.matches,
get_match_triples(f(&test)).as_slice(),
"test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}",
test.name,
test.patterns,
test.haystack,
test.offset,
);
}
}
}
}

View File

@ -0,0 +1,181 @@
// This file contains a set of fairly generic utility functions when working
// with SIMD vectors.
//
// SAFETY: All of the routines below are unsafe to call because they assume
// the necessary CPU target features in order to use particular vendor
// intrinsics. Calling these routines when the underlying CPU does not support
// the appropriate target features is NOT safe. Callers must ensure this
// themselves.
//
// Note that it may not look like this safety invariant is being upheld when
// these routines are called. Namely, the CPU feature check is typically pretty
// far away from when these routines are used. Instead, we rely on the fact
// that certain types serve as a guaranteed receipt that pertinent target
// features are enabled. For example, the only way TeddySlim3Mask256 can be
// constructed is if the AVX2 CPU feature is available. Thus, any code running
// inside of TeddySlim3Mask256 can use any of the functions below without any
// additional checks: its very existence *is* the check.
use std::arch::x86_64::*;
/// Shift `a` to the left by two bytes (removing its two most significant
/// bytes), and concatenate it with the the two most significant bytes of `b`.
#[target_feature(enable = "avx2")]
pub unsafe fn alignr256_14(a: __m256i, b: __m256i) -> __m256i {
// Credit goes to jneem for figuring this out:
// https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
//
// TL;DR avx2's PALIGNR instruction is actually just two 128-bit PALIGNR
// instructions, which is not what we want, so we need to do some extra
// shuffling.
// This permute gives us the low 16 bytes of a concatenated with the high
// 16 bytes of b, in order of most significant to least significant. So
// `v = a[15:0] b[31:16]`.
let v = _mm256_permute2x128_si256(b, a, 0x21);
// This effectively does this (where we deal in terms of byte-indexing
// and byte-shifting, and use inclusive ranges):
//
// ret[15:0] := ((a[15:0] << 16) | v[15:0]) >> 14
// = ((a[15:0] << 16) | b[31:16]) >> 14
// ret[31:16] := ((a[31:16] << 16) | v[31:16]) >> 14
// = ((a[31:16] << 16) | a[15:0]) >> 14
//
// Which therefore results in:
//
// ret[31:0] := a[29:16] a[15:14] a[13:0] b[31:30]
//
// The end result is that we've effectively done this:
//
// (a << 2) | (b >> 30)
//
// When `A` and `B` are strings---where the beginning of the string is in
// the least significant bits---we effectively result in the following
// semantic operation:
//
// (A >> 2) | (B << 30)
//
// The reversal being attributed to the fact that we are in little-endian.
_mm256_alignr_epi8(a, v, 14)
}
/// Shift `a` to the left by one byte (removing its most significant byte), and
/// concatenate it with the the most significant byte of `b`.
#[target_feature(enable = "avx2")]
pub unsafe fn alignr256_15(a: __m256i, b: __m256i) -> __m256i {
// For explanation, see alignr256_14.
let v = _mm256_permute2x128_si256(b, a, 0x21);
_mm256_alignr_epi8(a, v, 15)
}
/// Unpack the given 128-bit vector into its 64-bit components. The first
/// element of the array returned corresponds to the least significant 64-bit
/// lane in `a`.
#[target_feature(enable = "ssse3")]
pub unsafe fn unpack64x128(a: __m128i) -> [u64; 2] {
[
_mm_cvtsi128_si64(a) as u64,
_mm_cvtsi128_si64(_mm_srli_si128(a, 8)) as u64,
]
}
/// Unpack the given 256-bit vector into its 64-bit components. The first
/// element of the array returned corresponds to the least significant 64-bit
/// lane in `a`.
#[target_feature(enable = "avx2")]
pub unsafe fn unpack64x256(a: __m256i) -> [u64; 4] {
// Using transmute here is precisely equivalent, but actually slower. It's
// not quite clear why.
let lo = _mm256_extracti128_si256(a, 0);
let hi = _mm256_extracti128_si256(a, 1);
[
_mm_cvtsi128_si64(lo) as u64,
_mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64,
_mm_cvtsi128_si64(hi) as u64,
_mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64,
]
}
/// Unpack the low 128-bits of `a` and `b`, and return them as 4 64-bit
/// integers.
///
/// More precisely, if a = a4 a3 a2 a1 and b = b4 b3 b2 b1, where each element
/// is a 64-bit integer and a1/b1 correspond to the least significant 64 bits,
/// then the return value is `b2 b1 a2 a1`.
#[target_feature(enable = "avx2")]
pub unsafe fn unpacklo64x256(a: __m256i, b: __m256i) -> [u64; 4] {
let lo = _mm256_castsi256_si128(a);
let hi = _mm256_castsi256_si128(b);
[
_mm_cvtsi128_si64(lo) as u64,
_mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64,
_mm_cvtsi128_si64(hi) as u64,
_mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64,
]
}
/// Returns true if and only if all bits in the given 128-bit vector are 0.
#[target_feature(enable = "ssse3")]
pub unsafe fn is_all_zeroes128(a: __m128i) -> bool {
let cmp = _mm_cmpeq_epi8(a, zeroes128());
_mm_movemask_epi8(cmp) as u32 == 0xFFFF
}
/// Returns true if and only if all bits in the given 256-bit vector are 0.
#[target_feature(enable = "avx2")]
pub unsafe fn is_all_zeroes256(a: __m256i) -> bool {
let cmp = _mm256_cmpeq_epi8(a, zeroes256());
_mm256_movemask_epi8(cmp) as u32 == 0xFFFFFFFF
}
/// Load a 128-bit vector from slice at the given position. The slice does
/// not need to be unaligned.
///
/// Since this code assumes little-endian (there is no big-endian x86), the
/// bytes starting in `slice[at..]` will be at the least significant bits of
/// the returned vector. This is important for the surrounding code, since for
/// example, shifting the resulting vector right is equivalent to logically
/// shifting the bytes in `slice` left.
#[target_feature(enable = "sse2")]
pub unsafe fn loadu128(slice: &[u8], at: usize) -> __m128i {
let ptr = slice.get_unchecked(at..).as_ptr();
_mm_loadu_si128(ptr as *const u8 as *const __m128i)
}
/// Load a 256-bit vector from slice at the given position. The slice does
/// not need to be unaligned.
///
/// Since this code assumes little-endian (there is no big-endian x86), the
/// bytes starting in `slice[at..]` will be at the least significant bits of
/// the returned vector. This is important for the surrounding code, since for
/// example, shifting the resulting vector right is equivalent to logically
/// shifting the bytes in `slice` left.
#[target_feature(enable = "avx2")]
pub unsafe fn loadu256(slice: &[u8], at: usize) -> __m256i {
let ptr = slice.get_unchecked(at..).as_ptr();
_mm256_loadu_si256(ptr as *const u8 as *const __m256i)
}
/// Returns a 128-bit vector with all bits set to 0.
#[target_feature(enable = "sse2")]
pub unsafe fn zeroes128() -> __m128i {
_mm_set1_epi8(0)
}
/// Returns a 256-bit vector with all bits set to 0.
#[target_feature(enable = "avx2")]
pub unsafe fn zeroes256() -> __m256i {
_mm256_set1_epi8(0)
}
/// Returns a 128-bit vector with all bits set to 1.
#[target_feature(enable = "sse2")]
pub unsafe fn ones128() -> __m128i {
_mm_set1_epi8(0xFF as u8 as i8)
}
/// Returns a 256-bit vector with all bits set to 1.
#[target_feature(enable = "avx2")]
pub unsafe fn ones256() -> __m256i {
_mm256_set1_epi8(0xFF as u8 as i8)
}

View File

@ -0,0 +1,955 @@
use std::cmp;
use std::fmt;
use std::panic::{RefUnwindSafe, UnwindSafe};
use std::u8;
use memchr::{memchr, memchr2, memchr3};
use ahocorasick::MatchKind;
use packed;
use Match;
/// A candidate is the result of running a prefilter on a haystack at a
/// particular position. The result is either no match, a confirmed match or
/// a possible match.
///
/// When no match is returned, the prefilter is guaranteeing that no possible
/// match can be found in the haystack, and the caller may trust this. That is,
/// all correct prefilters must never report false negatives.
///
/// In some cases, a prefilter can confirm a match very quickly, in which case,
/// the caller may use this to stop what it's doing and report the match. In
/// this case, prefilter implementations must never report a false positive.
/// In other cases, the prefilter can only report a potential match, in which
/// case the callers must attempt to confirm the match. In this case, prefilter
/// implementations are permitted to return false positives.
#[derive(Clone, Debug)]
pub enum Candidate {
None,
Match(Match),
PossibleStartOfMatch(usize),
}
impl Candidate {
/// Convert this candidate into an option. This is useful when callers
/// do not distinguish between true positives and false positives (i.e.,
/// the caller must always confirm the match in order to update some other
/// state).
pub fn into_option(self) -> Option<usize> {
match self {
Candidate::None => None,
Candidate::Match(ref m) => Some(m.start()),
Candidate::PossibleStartOfMatch(start) => Some(start),
}
}
}
/// A prefilter describes the behavior of fast literal scanners for quickly
/// skipping past bytes in the haystack that we know cannot possibly
/// participate in a match.
pub trait Prefilter:
Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug
{
/// Returns the next possible match candidate. This may yield false
/// positives, so callers must confirm a match starting at the position
/// returned. This, however, must never produce false negatives. That is,
/// this must, at minimum, return the starting position of the next match
/// in the given haystack after or at the given position.
fn next_candidate(
&self,
state: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Candidate;
/// A method for cloning a prefilter, to work-around the fact that Clone
/// is not object-safe.
fn clone_prefilter(&self) -> Box<dyn Prefilter>;
/// Returns the approximate total amount of heap used by this prefilter, in
/// units of bytes.
fn heap_bytes(&self) -> usize;
/// Returns true if and only if this prefilter never returns false
/// positives. This is useful for completely avoiding the automaton
/// when the prefilter can quickly confirm its own matches.
///
/// By default, this returns true, which is conservative; it is always
/// correct to return `true`. Returning `false` here and reporting a false
/// positive will result in incorrect searches.
fn reports_false_positives(&self) -> bool {
true
}
}
impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P {
#[inline]
fn next_candidate(
&self,
state: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Candidate {
(**self).next_candidate(state, haystack, at)
}
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
(**self).clone_prefilter()
}
fn heap_bytes(&self) -> usize {
(**self).heap_bytes()
}
fn reports_false_positives(&self) -> bool {
(**self).reports_false_positives()
}
}
/// A convenience object for representing any type that implements Prefilter
/// and is cloneable.
#[derive(Debug)]
pub struct PrefilterObj(Box<dyn Prefilter>);
impl Clone for PrefilterObj {
fn clone(&self) -> Self {
PrefilterObj(self.0.clone_prefilter())
}
}
impl PrefilterObj {
/// Create a new prefilter object.
pub fn new<T: Prefilter + 'static>(t: T) -> PrefilterObj {
PrefilterObj(Box::new(t))
}
/// Return the underlying prefilter trait object.
pub fn as_ref(&self) -> &dyn Prefilter {
&*self.0
}
}
/// PrefilterState tracks state associated with the effectiveness of a
/// prefilter. It is used to track how many bytes, on average, are skipped by
/// the prefilter. If this average dips below a certain threshold over time,
/// then the state renders the prefilter inert and stops using it.
///
/// A prefilter state should be created for each search. (Where creating an
/// iterator via, e.g., `find_iter`, is treated as a single search.)
#[derive(Clone, Debug)]
pub struct PrefilterState {
/// The number of skips that has been executed.
skips: usize,
/// The total number of bytes that have been skipped.
skipped: usize,
/// The maximum length of a match. This is used to help determine how many
/// bytes on average should be skipped in order for a prefilter to be
/// effective.
max_match_len: usize,
/// Once this heuristic has been deemed permanently ineffective, it will be
/// inert throughout the rest of its lifetime. This serves as a cheap way
/// to check inertness.
inert: bool,
/// The last (absolute) position at which a prefilter scanned to.
/// Prefilters can use this position to determine whether to re-scan or
/// not.
///
/// Unlike other things that impact effectiveness, this is a fleeting
/// condition. That is, a prefilter can be considered ineffective if it is
/// at a position before `last_scan_at`, but can become effective again
/// once the search moves past `last_scan_at`.
///
/// The utility of this is to both avoid additional overhead from calling
/// the prefilter and to avoid quadratic behavior. This ensures that a
/// prefilter will scan any particular byte at most once. (Note that some
/// prefilters, like the start-byte prefilter, do not need to use this
/// field at all, since it only looks for starting bytes.)
last_scan_at: usize,
}
impl PrefilterState {
/// The minimum number of skip attempts to try before considering whether
/// a prefilter is effective or not.
const MIN_SKIPS: usize = 40;
/// The minimum amount of bytes that skipping must average, expressed as a
/// factor of the multiple of the length of a possible match.
///
/// That is, after MIN_SKIPS have occurred, if the average number of bytes
/// skipped ever falls below MIN_AVG_FACTOR * max-match-length, then the
/// prefilter outed to be rendered inert.
const MIN_AVG_FACTOR: usize = 2;
/// Create a fresh prefilter state.
pub fn new(max_match_len: usize) -> PrefilterState {
PrefilterState {
skips: 0,
skipped: 0,
max_match_len,
inert: false,
last_scan_at: 0,
}
}
/// Update this state with the number of bytes skipped on the last
/// invocation of the prefilter.
#[inline]
fn update_skipped_bytes(&mut self, skipped: usize) {
self.skips += 1;
self.skipped += skipped;
}
/// Updates the position at which the last scan stopped. This may be
/// greater than the position of the last candidate reported. For example,
/// searching for the "rare" byte `z` in `abczdef` for the pattern `abcz`
/// will report a candidate at position `0`, but the end of its last scan
/// will be at position `3`.
///
/// This position factors into the effectiveness of this prefilter. If the
/// current position is less than the last position at which a scan ended,
/// then the prefilter should not be re-run until the search moves past
/// that position.
#[inline]
fn update_at(&mut self, at: usize) {
if at > self.last_scan_at {
self.last_scan_at = at;
}
}
/// Return true if and only if this state indicates that a prefilter is
/// still effective.
///
/// The given pos should correspond to the current starting position of the
/// search.
#[inline]
pub fn is_effective(&mut self, at: usize) -> bool {
if self.inert {
return false;
}
if at < self.last_scan_at {
return false;
}
if self.skips < PrefilterState::MIN_SKIPS {
return true;
}
let min_avg = PrefilterState::MIN_AVG_FACTOR * self.max_match_len;
if self.skipped >= min_avg * self.skips {
return true;
}
// We're inert.
self.inert = true;
false
}
}
/// A builder for constructing the best possible prefilter. When constructed,
/// this builder will heuristically select the best prefilter it can build,
/// if any, and discard the rest.
#[derive(Debug)]
pub struct Builder {
count: usize,
ascii_case_insensitive: bool,
start_bytes: StartBytesBuilder,
rare_bytes: RareBytesBuilder,
packed: Option<packed::Builder>,
}
impl Builder {
/// Create a new builder for constructing the best possible prefilter.
pub fn new(kind: MatchKind) -> Builder {
let pbuilder = kind
.as_packed()
.map(|kind| packed::Config::new().match_kind(kind).builder());
Builder {
count: 0,
ascii_case_insensitive: false,
start_bytes: StartBytesBuilder::new(),
rare_bytes: RareBytesBuilder::new(),
packed: pbuilder,
}
}
/// Enable ASCII case insensitivity. When set, byte strings added to this
/// builder will be interpreted without respect to ASCII case.
pub fn ascii_case_insensitive(mut self, yes: bool) -> Builder {
self.ascii_case_insensitive = yes;
self.start_bytes = self.start_bytes.ascii_case_insensitive(yes);
self.rare_bytes = self.rare_bytes.ascii_case_insensitive(yes);
self
}
/// Return a prefilter suitable for quickly finding potential matches.
///
/// All patterns added to an Aho-Corasick automaton should be added to this
/// builder before attempting to construct the prefilter.
pub fn build(&self) -> Option<PrefilterObj> {
match (self.start_bytes.build(), self.rare_bytes.build()) {
// If we could build both start and rare prefilters, then there are
// a few cases in which we'd want to use the start-byte prefilter
// over the rare-byte prefilter, since the former has lower
// overhead.
(prestart @ Some(_), prerare @ Some(_)) => {
// If the start-byte prefilter can scan for a smaller number
// of bytes than the rare-byte prefilter, then it's probably
// faster.
let has_fewer_bytes =
self.start_bytes.count < self.rare_bytes.count;
// Otherwise, if the combined frequency rank of the detected
// bytes in the start-byte prefilter is "close" to the combined
// frequency rank of the rare-byte prefilter, then we pick
// the start-byte prefilter even if the rare-byte prefilter
// heuristically searches for rare bytes. This is because the
// rare-byte prefilter has higher constant costs, so we tend to
// prefer the start-byte prefilter when we can.
let has_rarer_bytes =
self.start_bytes.rank_sum <= self.rare_bytes.rank_sum + 50;
if has_fewer_bytes || has_rarer_bytes {
prestart
} else {
prerare
}
}
(prestart @ Some(_), None) => prestart,
(None, prerare @ Some(_)) => prerare,
(None, None) if self.ascii_case_insensitive => None,
(None, None) => self
.packed
.as_ref()
.and_then(|b| b.build())
.map(|s| PrefilterObj::new(Packed(s))),
}
}
/// Add a literal string to this prefilter builder.
pub fn add(&mut self, bytes: &[u8]) {
self.count += 1;
self.start_bytes.add(bytes);
self.rare_bytes.add(bytes);
if let Some(ref mut pbuilder) = self.packed {
pbuilder.add(bytes);
}
}
}
/// A type that wraps a packed searcher and implements the `Prefilter`
/// interface.
#[derive(Clone, Debug)]
struct Packed(packed::Searcher);
impl Prefilter for Packed {
fn next_candidate(
&self,
_state: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Candidate {
self.0.find_at(haystack, at).map_or(Candidate::None, Candidate::Match)
}
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
Box::new(self.clone())
}
fn heap_bytes(&self) -> usize {
self.0.heap_bytes()
}
fn reports_false_positives(&self) -> bool {
false
}
}
/// A builder for constructing a rare byte prefilter.
///
/// A rare byte prefilter attempts to pick out a small set of rare bytes that
/// occurr in the patterns, and then quickly scan to matches of those rare
/// bytes.
#[derive(Clone, Debug)]
struct RareBytesBuilder {
/// Whether this prefilter should account for ASCII case insensitivity or
/// not.
ascii_case_insensitive: bool,
/// A set of byte offsets associated with detected rare bytes. An entry is
/// only set if a rare byte is detected in a pattern.
byte_offsets: RareByteOffsets,
/// Whether this is available as a prefilter or not. This can be set to
/// false during construction if a condition is seen that invalidates the
/// use of the rare-byte prefilter.
available: bool,
/// The number of bytes set to an active value in `byte_offsets`.
count: usize,
/// The sum of frequency ranks for the rare bytes detected. This is
/// intended to give a heuristic notion of how rare the bytes are.
rank_sum: u16,
}
/// A set of rare byte offsets, keyed by byte.
#[derive(Clone, Copy)]
struct RareByteOffsets {
/// When an item in this set has an offset of u8::MAX (255), then it is
/// considered unset.
set: [RareByteOffset; 256],
}
impl RareByteOffsets {
/// Create a new empty set of rare byte offsets.
pub fn empty() -> RareByteOffsets {
RareByteOffsets { set: [RareByteOffset::default(); 256] }
}
/// Add the given offset for the given byte to this set. If the offset is
/// greater than the existing offset, then it overwrites the previous
/// value and returns false. If there is no previous value set, then this
/// sets it and returns true.
///
/// The given offset must be active, otherwise this panics.
pub fn apply(&mut self, byte: u8, off: RareByteOffset) -> bool {
assert!(off.is_active());
let existing = &mut self.set[byte as usize];
if !existing.is_active() {
*existing = off;
true
} else {
if existing.max < off.max {
*existing = off;
}
false
}
}
}
impl fmt::Debug for RareByteOffsets {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut offsets = vec![];
for off in self.set.iter() {
if off.is_active() {
offsets.push(off);
}
}
f.debug_struct("RareByteOffsets").field("set", &offsets).finish()
}
}
/// Offsets associated with an occurrence of a "rare" byte in any of the
/// patterns used to construct a single Aho-Corasick automaton.
#[derive(Clone, Copy, Debug)]
struct RareByteOffset {
/// The maximum offset at which a particular byte occurs from the start
/// of any pattern. This is used as a shift amount. That is, when an
/// occurrence of this byte is found, the candidate position reported by
/// the prefilter is `position_of_byte - max`, such that the automaton
/// will begin its search at a position that is guaranteed to observe a
/// match.
///
/// To avoid accidentally quadratic behavior, a prefilter is considered
/// ineffective when it is asked to start scanning from a position that it
/// has already scanned past.
///
/// N.B. The maximum value for this is 254. A value of 255 indicates that
/// this is unused. If a rare byte is found at an offset of 255 or greater,
/// then the rare-byte prefilter is disabled for simplicity.
max: u8,
}
impl Default for RareByteOffset {
fn default() -> RareByteOffset {
RareByteOffset { max: u8::MAX }
}
}
impl RareByteOffset {
/// Create a new rare byte offset. If the given offset is too big, then
/// an inactive `RareByteOffset` is returned.
fn new(max: usize) -> RareByteOffset {
if max > (u8::MAX - 1) as usize {
RareByteOffset::default()
} else {
RareByteOffset { max: max as u8 }
}
}
/// Returns true if and only if this offset is active. If it's inactive,
/// then it should not be used.
fn is_active(&self) -> bool {
self.max < u8::MAX
}
}
impl RareBytesBuilder {
/// Create a new builder for constructing a rare byte prefilter.
fn new() -> RareBytesBuilder {
RareBytesBuilder {
ascii_case_insensitive: false,
byte_offsets: RareByteOffsets::empty(),
available: true,
count: 0,
rank_sum: 0,
}
}
/// Enable ASCII case insensitivity. When set, byte strings added to this
/// builder will be interpreted without respect to ASCII case.
fn ascii_case_insensitive(mut self, yes: bool) -> RareBytesBuilder {
self.ascii_case_insensitive = yes;
self
}
/// Build the rare bytes prefilter.
///
/// If there are more than 3 distinct starting bytes, or if heuristics
/// otherwise determine that this prefilter should not be used, then `None`
/// is returned.
fn build(&self) -> Option<PrefilterObj> {
if !self.available || self.count > 3 {
return None;
}
let (mut bytes, mut len) = ([0; 3], 0);
for b in 0..256 {
if self.byte_offsets.set[b].is_active() {
bytes[len] = b as u8;
len += 1;
}
}
match len {
0 => None,
1 => Some(PrefilterObj::new(RareBytesOne {
byte1: bytes[0],
offset: self.byte_offsets.set[bytes[0] as usize],
})),
2 => Some(PrefilterObj::new(RareBytesTwo {
offsets: self.byte_offsets,
byte1: bytes[0],
byte2: bytes[1],
})),
3 => Some(PrefilterObj::new(RareBytesThree {
offsets: self.byte_offsets,
byte1: bytes[0],
byte2: bytes[1],
byte3: bytes[2],
})),
_ => unreachable!(),
}
}
/// Add a byte string to this builder.
///
/// All patterns added to an Aho-Corasick automaton should be added to this
/// builder before attempting to construct the prefilter.
fn add(&mut self, bytes: &[u8]) {
// If we've already blown our budget, then don't waste time looking
// for more rare bytes.
if self.count > 3 {
self.available = false;
return;
}
let mut rarest = match bytes.get(0) {
None => return,
Some(&b) => (b, 0, freq_rank(b)),
};
// The idea here is to look for the rarest byte in each pattern, and
// add that to our set. As a special exception, if we see a byte that
// we've already added, then we immediately stop and choose that byte,
// even if there's another rare byte in the pattern. This helps us
// apply the rare byte optimization in more cases by attempting to pick
// bytes that are in common between patterns. So for example, if we
// were searching for `Sherlock` and `lockjaw`, then this would pick
// `k` for both patterns, resulting in the use of `memchr` instead of
// `memchr2` for `k` and `j`.
for (pos, &b) in bytes.iter().enumerate() {
if self.byte_offsets.set[b as usize].is_active() {
self.add_rare_byte(b, pos);
return;
}
let rank = freq_rank(b);
if rank < rarest.2 {
rarest = (b, pos, rank);
}
}
self.add_rare_byte(rarest.0, rarest.1);
}
fn add_rare_byte(&mut self, byte: u8, pos: usize) {
self.add_one_byte(byte, pos);
if self.ascii_case_insensitive {
self.add_one_byte(opposite_ascii_case(byte), pos);
}
}
fn add_one_byte(&mut self, byte: u8, pos: usize) {
let off = RareByteOffset::new(pos);
if !off.is_active() {
self.available = false;
return;
}
if self.byte_offsets.apply(byte, off) {
self.count += 1;
self.rank_sum += freq_rank(byte) as u16;
}
}
}
/// A prefilter for scanning for a single "rare" byte.
#[derive(Clone, Debug)]
struct RareBytesOne {
byte1: u8,
offset: RareByteOffset,
}
impl Prefilter for RareBytesOne {
fn next_candidate(
&self,
state: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Candidate {
memchr(self.byte1, &haystack[at..])
.map(|i| {
let pos = at + i;
state.last_scan_at = pos;
cmp::max(at, pos.saturating_sub(self.offset.max as usize))
})
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
}
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
Box::new(self.clone())
}
fn heap_bytes(&self) -> usize {
0
}
}
/// A prefilter for scanning for two "rare" bytes.
#[derive(Clone, Debug)]
struct RareBytesTwo {
offsets: RareByteOffsets,
byte1: u8,
byte2: u8,
}
impl Prefilter for RareBytesTwo {
fn next_candidate(
&self,
state: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Candidate {
memchr2(self.byte1, self.byte2, &haystack[at..])
.map(|i| {
let pos = at + i;
state.update_at(pos);
let offset = self.offsets.set[haystack[pos] as usize].max;
cmp::max(at, pos.saturating_sub(offset as usize))
})
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
}
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
Box::new(self.clone())
}
fn heap_bytes(&self) -> usize {
0
}
}
/// A prefilter for scanning for three "rare" bytes.
#[derive(Clone, Debug)]
struct RareBytesThree {
offsets: RareByteOffsets,
byte1: u8,
byte2: u8,
byte3: u8,
}
impl Prefilter for RareBytesThree {
fn next_candidate(
&self,
state: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Candidate {
memchr3(self.byte1, self.byte2, self.byte3, &haystack[at..])
.map(|i| {
let pos = at + i;
state.update_at(pos);
let offset = self.offsets.set[haystack[pos] as usize].max;
cmp::max(at, pos.saturating_sub(offset as usize))
})
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
}
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
Box::new(self.clone())
}
fn heap_bytes(&self) -> usize {
0
}
}
/// A builder for constructing a starting byte prefilter.
///
/// A starting byte prefilter is a simplistic prefilter that looks for possible
/// matches by reporting all positions corresponding to a particular byte. This
/// generally only takes affect when there are at most 3 distinct possible
/// starting bytes. e.g., the patterns `foo`, `bar`, and `baz` have two
/// distinct starting bytes (`f` and `b`), and this prefiler returns all
/// occurrences of either `f` or `b`.
///
/// In some cases, a heuristic frequency analysis may determine that it would
/// be better not to use this prefilter even when there are 3 or fewer distinct
/// starting bytes.
#[derive(Clone, Debug)]
struct StartBytesBuilder {
/// Whether this prefilter should account for ASCII case insensitivity or
/// not.
ascii_case_insensitive: bool,
/// The set of starting bytes observed.
byteset: Vec<bool>,
/// The number of bytes set to true in `byteset`.
count: usize,
/// The sum of frequency ranks for the rare bytes detected. This is
/// intended to give a heuristic notion of how rare the bytes are.
rank_sum: u16,
}
impl StartBytesBuilder {
/// Create a new builder for constructing a start byte prefilter.
fn new() -> StartBytesBuilder {
StartBytesBuilder {
ascii_case_insensitive: false,
byteset: vec![false; 256],
count: 0,
rank_sum: 0,
}
}
/// Enable ASCII case insensitivity. When set, byte strings added to this
/// builder will be interpreted without respect to ASCII case.
fn ascii_case_insensitive(mut self, yes: bool) -> StartBytesBuilder {
self.ascii_case_insensitive = yes;
self
}
/// Build the starting bytes prefilter.
///
/// If there are more than 3 distinct starting bytes, or if heuristics
/// otherwise determine that this prefilter should not be used, then `None`
/// is returned.
fn build(&self) -> Option<PrefilterObj> {
if self.count > 3 {
return None;
}
let (mut bytes, mut len) = ([0; 3], 0);
for b in 0..256 {
if !self.byteset[b] {
continue;
}
// We don't handle non-ASCII bytes for now. Getting non-ASCII
// bytes right is trickier, since we generally don't want to put
// a leading UTF-8 code unit into a prefilter that isn't ASCII,
// since they can frequently. Instead, it would be better to use a
// continuation byte, but this requires more sophisticated analysis
// of the automaton and a richer prefilter API.
if b > 0x7F {
return None;
}
bytes[len] = b as u8;
len += 1;
}
match len {
0 => None,
1 => Some(PrefilterObj::new(StartBytesOne { byte1: bytes[0] })),
2 => Some(PrefilterObj::new(StartBytesTwo {
byte1: bytes[0],
byte2: bytes[1],
})),
3 => Some(PrefilterObj::new(StartBytesThree {
byte1: bytes[0],
byte2: bytes[1],
byte3: bytes[2],
})),
_ => unreachable!(),
}
}
/// Add a byte string to this builder.
///
/// All patterns added to an Aho-Corasick automaton should be added to this
/// builder before attempting to construct the prefilter.
fn add(&mut self, bytes: &[u8]) {
if self.count > 3 {
return;
}
if let Some(&byte) = bytes.get(0) {
self.add_one_byte(byte);
if self.ascii_case_insensitive {
self.add_one_byte(opposite_ascii_case(byte));
}
}
}
fn add_one_byte(&mut self, byte: u8) {
if !self.byteset[byte as usize] {
self.byteset[byte as usize] = true;
self.count += 1;
self.rank_sum += freq_rank(byte) as u16;
}
}
}
/// A prefilter for scanning for a single starting byte.
#[derive(Clone, Debug)]
struct StartBytesOne {
byte1: u8,
}
impl Prefilter for StartBytesOne {
fn next_candidate(
&self,
_state: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Candidate {
memchr(self.byte1, &haystack[at..])
.map(|i| at + i)
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
}
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
Box::new(self.clone())
}
fn heap_bytes(&self) -> usize {
0
}
}
/// A prefilter for scanning for two starting bytes.
#[derive(Clone, Debug)]
struct StartBytesTwo {
byte1: u8,
byte2: u8,
}
impl Prefilter for StartBytesTwo {
fn next_candidate(
&self,
_state: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Candidate {
memchr2(self.byte1, self.byte2, &haystack[at..])
.map(|i| at + i)
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
}
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
Box::new(self.clone())
}
fn heap_bytes(&self) -> usize {
0
}
}
/// A prefilter for scanning for three starting bytes.
#[derive(Clone, Debug)]
struct StartBytesThree {
byte1: u8,
byte2: u8,
byte3: u8,
}
impl Prefilter for StartBytesThree {
fn next_candidate(
&self,
_state: &mut PrefilterState,
haystack: &[u8],
at: usize,
) -> Candidate {
memchr3(self.byte1, self.byte2, self.byte3, &haystack[at..])
.map(|i| at + i)
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
}
fn clone_prefilter(&self) -> Box<dyn Prefilter> {
Box::new(self.clone())
}
fn heap_bytes(&self) -> usize {
0
}
}
/// Return the next candidate reported by the given prefilter while
/// simultaneously updating the given prestate.
///
/// The caller is responsible for checking the prestate before deciding whether
/// to initiate a search.
#[inline]
pub fn next<P: Prefilter>(
prestate: &mut PrefilterState,
prefilter: P,
haystack: &[u8],
at: usize,
) -> Candidate {
let cand = prefilter.next_candidate(prestate, haystack, at);
match cand {
Candidate::None => {
prestate.update_skipped_bytes(haystack.len() - at);
}
Candidate::Match(ref m) => {
prestate.update_skipped_bytes(m.start() - at);
}
Candidate::PossibleStartOfMatch(i) => {
prestate.update_skipped_bytes(i - at);
}
}
cand
}
/// If the given byte is an ASCII letter, then return it in the opposite case.
/// e.g., Given `b'A'`, this returns `b'a'`, and given `b'a'`, this returns
/// `b'A'`. If a non-ASCII letter is given, then the given byte is returned.
pub fn opposite_ascii_case(b: u8) -> u8 {
if b'A' <= b && b <= b'Z' {
b.to_ascii_lowercase()
} else if b'a' <= b && b <= b'z' {
b.to_ascii_uppercase()
} else {
b
}
}
/// Return the frequency rank of the given byte. The higher the rank, the more
/// common the byte (heuristically speaking).
fn freq_rank(b: u8) -> u8 {
use byte_frequencies::BYTE_FREQUENCIES;
BYTE_FREQUENCIES[b as usize]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scratch() {
let mut b = Builder::new(MatchKind::LeftmostFirst);
b.add(b"Sherlock");
b.add(b"locjaw");
// b.add(b"Sherlock");
// b.add(b"Holmes");
// b.add(b"Watson");
// b.add("Шерлок Холмс".as_bytes());
// b.add("Джон Уотсон".as_bytes());
let s = b.build().unwrap();
println!("{:?}", s);
}
}

View File

@ -0,0 +1,203 @@
use std::fmt::Debug;
use std::hash::Hash;
use error::{Error, Result};
// NOTE: Most of this code was copied from regex-automata, but without the
// (de)serialization specific stuff.
/// Check that the premultiplication of the given state identifier can
/// fit into the representation indicated by `S`. If it cannot, or if it
/// overflows `usize` itself, then an error is returned.
pub fn premultiply_overflow_error<S: StateID>(
last_state: S,
alphabet_len: usize,
) -> Result<()> {
let requested = match last_state.to_usize().checked_mul(alphabet_len) {
Some(requested) => requested,
None => return Err(Error::premultiply_overflow(0, 0)),
};
if requested > S::max_id() {
return Err(Error::premultiply_overflow(S::max_id(), requested));
}
Ok(())
}
/// Convert the given `usize` to the chosen state identifier
/// representation. If the given value cannot fit in the chosen
/// representation, then an error is returned.
pub fn usize_to_state_id<S: StateID>(value: usize) -> Result<S> {
if value > S::max_id() {
Err(Error::state_id_overflow(S::max_id()))
} else {
Ok(S::from_usize(value))
}
}
/// Return the unique identifier for an automaton's fail state in the chosen
/// representation indicated by `S`.
pub fn fail_id<S: StateID>() -> S {
S::from_usize(0)
}
/// Return the unique identifier for an automaton's fail state in the chosen
/// representation indicated by `S`.
pub fn dead_id<S: StateID>() -> S {
S::from_usize(1)
}
mod private {
/// Sealed stops crates other than aho-corasick from implementing any
/// traits that use it.
pub trait Sealed {}
impl Sealed for u8 {}
impl Sealed for u16 {}
impl Sealed for u32 {}
impl Sealed for u64 {}
impl Sealed for usize {}
}
/// A trait describing the representation of an automaton's state identifier.
///
/// The purpose of this trait is to safely express both the possible state
/// identifier representations that can be used in an automaton and to convert
/// between state identifier representations and types that can be used to
/// efficiently index memory (such as `usize`).
///
/// In general, one should not need to implement this trait explicitly. Indeed,
/// for now, this trait is sealed such that it cannot be implemented by any
/// other type. In particular, this crate provides implementations for `u8`,
/// `u16`, `u32`, `u64` and `usize`. (`u32` and `u64` are only provided for
/// targets that can represent all corresponding values in a `usize`.)
///
/// # Safety
///
/// This trait is unsafe because the correctness of its implementations may be
/// relied upon by other unsafe code. For example, one possible way to
/// implement this trait incorrectly would be to return a maximum identifier
/// in `max_id` that is greater than the real maximum identifier. This will
/// likely result in wrap-on-overflow semantics in release mode, which can in
/// turn produce incorrect state identifiers. Those state identifiers may then
/// in turn access out-of-bounds memory in an automaton's search routine, where
/// bounds checks are explicitly elided for performance reasons.
pub unsafe trait StateID:
private::Sealed
+ Clone
+ Copy
+ Debug
+ Eq
+ Hash
+ PartialEq
+ PartialOrd
+ Ord
{
/// Convert from a `usize` to this implementation's representation.
///
/// Implementors may assume that `n <= Self::max_id`. That is, implementors
/// do not need to check whether `n` can fit inside this implementation's
/// representation.
fn from_usize(n: usize) -> Self;
/// Convert this implementation's representation to a `usize`.
///
/// Implementors must not return a `usize` value greater than
/// `Self::max_id` and must not permit overflow when converting between the
/// implementor's representation and `usize`. In general, the preferred
/// way for implementors to achieve this is to simply not provide
/// implementations of `StateID` that cannot fit into the target platform's
/// `usize`.
fn to_usize(self) -> usize;
/// Return the maximum state identifier supported by this representation.
///
/// Implementors must return a correct bound. Doing otherwise may result
/// in memory unsafety.
fn max_id() -> usize;
}
unsafe impl StateID for usize {
#[inline]
fn from_usize(n: usize) -> usize {
n
}
#[inline]
fn to_usize(self) -> usize {
self
}
#[inline]
fn max_id() -> usize {
::std::usize::MAX
}
}
unsafe impl StateID for u8 {
#[inline]
fn from_usize(n: usize) -> u8 {
n as u8
}
#[inline]
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize {
::std::u8::MAX as usize
}
}
unsafe impl StateID for u16 {
#[inline]
fn from_usize(n: usize) -> u16 {
n as u16
}
#[inline]
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize {
::std::u16::MAX as usize
}
}
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
unsafe impl StateID for u32 {
#[inline]
fn from_usize(n: usize) -> u32 {
n as u32
}
#[inline]
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize {
::std::u32::MAX as usize
}
}
#[cfg(target_pointer_width = "64")]
unsafe impl StateID for u64 {
#[inline]
fn from_usize(n: usize) -> u64 {
n as u64
}
#[inline]
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize {
::std::u64::MAX as usize
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
{"files":{"CHANGELOG.md":"4d03cdc2987a1fa1b86a2de5fa57714d83cbb9d3d3f400eadecd8e8a0a857621","Cargo.toml":"a9ec8b36707f907971b410719b85e9594cb96c9e4bca6f831e2cc78ba22c71da","LICENSE":"0dd39f89842df915b8ded7ac59e8a1372cf5be36133818866cca2ef3af1a2849","README.md":"132383b73044b1e91acb9e5d69afeb8f14239cfe712aca59152bfe0c420f7a33","examples/test.rs":"4e9e73dfe80573296e93f66c2c03681908c278a758dceb4913ecb65d20e9ed86","src/lib.rs":"7a0e852a4bbfbf72c7702527d7c6f7f8c717fca77bfd4b3e78ba7f6cebed4e6f","src/line.rs":"edbdc54503342733f8aa7a4aa72a7cb08d376d53ca2b85e00a77dd42bf04bb22","src/shapes/mod.rs":"071d6ea4080dc8f1e4299258d65c32bccc40e9eb6933f3b3600576d58e7917ae","src/shapes/rectangle.rs":"ad545b9d4a628b3a515deb9b087f881b253d3f3a16a60734da82896d51c93cc9","src/text/fontconfig.rs":"c673bfcf5df387479dd2027a733d8de85461731b448202f49a9f2d1bce54f465","src/text/mod.rs":"4afd25c6297d55cd5a3956e5ae6d3921403b306533a237fe2e5eab33e65a91ee"},"package":"9b7f09f89872c2b6b29e319377b1fbe91c6f5947df19a25596e121cf19a7b35e"}

View File

@ -0,0 +1,49 @@
"""
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # "MIT"
])
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_library",
"rust_binary",
"rust_test",
)
rust_library(
name = "andrew",
crate_root = "src/lib.rs",
crate_type = "lib",
edition = "2015",
srcs = glob(["**/*.rs"]),
deps = [
"//third_party/cargo/vendor/bitflags-1.2.1:bitflags",
"//third_party/cargo/vendor/line_drawing-0.7.0:line_drawing",
"//third_party/cargo/vendor/walkdir-2.3.1:walkdir",
"//third_party/cargo/vendor/xdg-2.2.0:xdg",
"//third_party/cargo/vendor/xml-rs-0.8.0:xml_rs",
"//third_party/cargo/vendor/rusttype-0.8.2:rusttype",
],
rustc_flags = [
"--cap-lints=allow",
],
version = "0.2.1",
crate_features = [
],
)
# Unsupported target "test" with type "example" omitted

View File

@ -0,0 +1,47 @@
# Change Log
## Unreleased
## 0.2.1 -- 2019-03-29
- Fix `get_width()` for texts that start and end with spaces
## 0.2.0 -- 2019-01-26
- **[Breaking]** Canvas is now endian aware and will draw to the buffer in the endianness of the `Endian` its created with
## 0.1.6 -- 2019-01-24
- Faster drawing of horizontal and verticle lines by precomputing line boundaries
- Only calculate alpha overlay when drawing colors without a non-max alpha value for performance
## 0.1.5 -- 2019-01-13
- Fix drawing of characters with negative bounding boxes
- Fix error in `get_width()` for text without any characters
## 0.1.4 -- 2018-11-10
- Remove rusttype version restriction
## 0.1.3 -- 2018-10-09
- Move from `quick-xml` to `xml-rs` dependency
## 0.1.2 -- 2018-10-04
- Add basic/experimental support for fontconfig in `andrew::text::fontconfig`
## 0.1.1 -- 2018-09-17
- Manage dependencies to maintain rust 1.22 compatibility
- Update rusttype to 0.7.1
## 0.1.0 -- 2018-08-17
Initial version, including:
- canvas
- lines
- rectangles
- text

View File

@ -0,0 +1,41 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g. crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "andrew"
version = "0.2.1"
authors = ["Lucas Timmins <timmins.s.lucas@gmail.com>"]
description = "The andrew crate provides convenient drawing of objects such as shapes, lines and text to buffers"
readme = "README.md"
keywords = ["draw", "buffer", "shapes", "lines", "pixels"]
categories = ["rendering", "multimedia::images"]
license = "MIT"
repository = "https://github.com/trimental/andrew"
[dependencies.bitflags]
version = "1.0.3"
[dependencies.line_drawing]
version = "0.7.0"
[dependencies.rusttype]
version = "0.7.1"
[dependencies.walkdir]
version = "2.0"
[dependencies.xdg]
version = "2.1.0"
[dependencies.xml-rs]
version = "0.8.0"
[dev-dependencies.smithay-client-toolkit]
version = "0.4.0"

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Lucas Timmins
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,3 @@
# Andrew
This crate provides convenient drawing of objects such as shapes, lines and text to buffers

View File

@ -0,0 +1,210 @@
extern crate andrew;
extern crate smithay_client_toolkit as sctk;
use std::io::{Read, Seek, SeekFrom, Write};
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
use sctk::utils::{DoubleMemPool, MemPool};
use sctk::window::{ConceptFrame, Event as WEvent, Window};
use sctk::Environment;
use sctk::reexports::client::protocol::wl_compositor::RequestsTrait as CompositorRequests;
use sctk::reexports::client::protocol::wl_surface::RequestsTrait as SurfaceRequests;
use sctk::reexports::client::protocol::{wl_shm, wl_surface};
use sctk::reexports::client::{Display, Proxy};
use andrew::shapes::rectangle;
use andrew::text;
use andrew::text::fontconfig;
fn main() {
let (display, mut event_queue) =
Display::connect_to_env().expect("Failed to connect to the wayland server.");
let env = Environment::from_display(&*display, &mut event_queue).unwrap();
let seat = env
.manager
.instantiate_auto(|seat| seat.implement(|_, _| {}, ()))
.unwrap();
let mut dimensions = (600, 400);
let surface = env
.compositor
.create_surface(|surface| surface.implement(|_, _| {}, ()))
.unwrap();
let next_action = Arc::new(Mutex::new(None::<WEvent>));
let waction = next_action.clone();
let mut window = Window::<ConceptFrame>::init_from_env(&env, surface, dimensions, move |evt| {
let mut next_action = waction.lock().unwrap();
// Keep last event in priority order : Close > Configure > Refresh
let replace = match (&evt, &*next_action) {
(_, &None)
| (_, &Some(WEvent::Refresh))
| (&WEvent::Configure { .. }, &Some(WEvent::Configure { .. }))
| (&WEvent::Close, _) => true,
_ => false,
};
if replace {
*next_action = Some(evt);
}
})
.expect("Failed to create a window !");
window.new_seat(&seat);
let mut pools = DoubleMemPool::new(&env.shm, || {}).expect("Failed to create a memory pool !");
let mut font_data = Vec::new();
::std::fs::File::open(
&fontconfig::FontConfig::new()
.unwrap()
.get_regular_family_fonts("sans")
.unwrap()[0],
)
.unwrap()
.read_to_end(&mut font_data)
.unwrap();
if !env.shell.needs_configure() {
if let Some(pool) = pools.pool() {
redraw(pool, window.surface(), dimensions, &font_data);
}
window.refresh();
}
loop {
match next_action.lock().unwrap().take() {
Some(WEvent::Close) => break,
Some(WEvent::Refresh) => {
window.refresh();
window.surface().commit();
}
Some(WEvent::Configure { new_size, .. }) => {
if let Some((w, h)) = new_size {
window.resize(w, h);
dimensions = (w, h)
}
window.refresh();
if let Some(pool) = pools.pool() {
redraw(pool, window.surface(), dimensions, &font_data);
}
}
None => {}
}
display.flush().unwrap();
event_queue.dispatch().unwrap();
}
}
fn redraw(
pool: &mut MemPool,
surface: &Proxy<wl_surface::WlSurface>,
dimensions: (u32, u32),
font_data: &[u8],
) {
let (buf_x, buf_y) = (dimensions.0 as usize, dimensions.1 as usize);
pool.resize(4 * buf_x * buf_y)
.expect("Failed to resize the memory pool.");
let mut buf: Vec<u8> = vec![255; 4 * buf_x * buf_y];
let mut canvas =
andrew::Canvas::new(&mut buf, buf_x, buf_y, 4 * buf_x, andrew::Endian::native());
println!("______________");
let mut total_dur = Duration::new(0, 0);
// Draw background
let (block_w, block_h) = (buf_x / 20, buf_y / 20);
for block_y in 0..21 {
for block_x in 0..21 {
let color = if (block_x + (block_y % 2)) % 2 == 0 {
[255, 0, 0, 0]
} else {
[255, 255, 255, 255]
};
let block = rectangle::Rectangle::new(
(block_w * block_x, block_h * block_y),
(block_w, block_h),
None,
Some(color),
);
let timer = Instant::now();
canvas.draw(&block);
total_dur += timer.elapsed()
}
}
println!("Background draw time: {:?}", total_dur);
let rectangle = rectangle::Rectangle::new(
(buf_x / 30, buf_y / 4),
(buf_x - (buf_x / 30) * 2, buf_y - buf_y / 2),
Some((
15,
[255, 170, 20, 45],
rectangle::Sides::TOP ^ rectangle::Sides::BOTTOM,
Some(10),
)),
Some([255, 170, 20, 45]),
);
let mut timer = Instant::now();
canvas.draw(&rectangle);
println!("Rectangle draw time: {:?}", timer.elapsed());
total_dur += timer.elapsed();
let text_h = buf_x as f32 / 80.;
let text_hh = text_h / 2.;
let mut text = text::Text::new(
(63, 69),
[255, 255, 255, 255],
font_data,
text_h,
2.0,
"“Life is the art of drawing without an eraser.” - John W. Gardner",
);
text.pos = (
buf_x / 2 - text.get_width() / 2,
buf_y / 2 - text_hh as usize,
);
let text_box = rectangle::Rectangle::new(
(
buf_x / 2 - text.get_width() / 2 - 10,
buf_y / 2 - text_hh as usize - 10,
),
(text.get_width() + 20, text_h as usize + 20),
Some((3, [255, 255, 255, 255], rectangle::Sides::ALL, Some(5))),
None,
);
timer = Instant::now();
canvas.draw(&text_box);
println!("Text box draw time: {:?}", timer.elapsed());
total_dur += timer.elapsed();
timer = Instant::now();
canvas.draw(&text);
println!("Text draw time: {:?}", timer.elapsed());
total_dur += timer.elapsed();
println!("Total draw time: {:?}", total_dur);
pool.seek(SeekFrom::Start(0)).unwrap();
pool.write_all(canvas.buffer).unwrap();
pool.flush().unwrap();
let new_buffer = pool.buffer(
0,
buf_x as i32,
buf_y as i32,
4 * buf_x as i32,
wl_shm::Format::Argb8888,
);
surface.attach(Some(&new_buffer), 0, 0);
surface.commit();
}

View File

@ -0,0 +1,134 @@
//! Andrew is a crate for drawing objects
#![warn(missing_docs)]
extern crate line_drawing;
extern crate rusttype;
extern crate walkdir;
extern crate xdg;
extern crate xml;
#[macro_use]
extern crate bitflags;
/// A module that contains functions and objects relating to lines
pub mod line;
/// A module that contains functions and objects relating to shapes
pub mod shapes;
/// A module that contains functions and objects relating to text
pub mod text;
/// The Drawable trait allows object to be drawn to a buffer or canvas
pub trait Drawable {
/// A function that draws the object to a canvas
fn draw(&self, canvas: &mut Canvas);
}
/// Describes an endianness (aka byte order)
#[derive(Debug, PartialEq)]
pub enum Endian {
/// Little Endian
Little,
/// Big Endian
Big,
}
impl Endian {
/// Returns the native endianness
pub fn native() -> Endian {
if cfg!(target_endian = "little") {
Endian::Little
} else {
Endian::Big
}
}
}
/// The canvas object acts as a wrapper around a buffer, providing information and functions
/// for drawing
pub struct Canvas<'a> {
/// A buffer for the canvas to draw to
pub buffer: &'a mut [u8],
/// The width in pixels of the canvas
pub width: usize,
/// The height in pixels of the canvas
pub height: usize,
/// The number of bytes between each line of pixels on the canvas
pub stride: usize,
/// The number of bytes contained in each pixel
pub pixel_size: usize,
/// The endianness of the canvas
pub endianness: Endian,
}
impl<'a> Canvas<'a> {
/// Creates a new canvas object
pub fn new(
buffer: &'a mut [u8],
width: usize,
height: usize,
stride: usize,
endianness: Endian,
) -> Canvas<'a> {
assert!(
stride % width == 0,
"Incorrect Dimensions - Stride is not a multiple of width"
);
assert!(buffer.len() == stride * height);
let pixel_size = stride / width;
Canvas {
buffer,
width,
height,
stride,
pixel_size,
endianness,
}
}
/// Draws an object that implements the Drawable trait to the buffer
pub fn draw<D: Drawable>(&mut self, drawable: &D) {
drawable.draw(self);
}
/// Draws a pixel at the x and y coordinate
pub fn draw_point(&mut self, x: usize, y: usize, color: [u8; 4]) {
let base = self.stride * y + self.pixel_size * x;
if self.endianness == Endian::Little {
if color[0] == 255 {
self.buffer[base + 3] = color[0];
self.buffer[base + 2] = color[1];
self.buffer[base + 1] = color[2];
self.buffer[base] = color[3];
} else {
for c in 0..3 {
let alpha = f32::from(color[0]) / 255.0;
let color_diff =
(color[3 - c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
self.buffer[base + c] = new_color as u8;
}
self.buffer[base + 3] = 255 as u8;
}
} else if color[0] == 255 {
self.buffer[base] = color[0];
self.buffer[base + 1] = color[1];
self.buffer[base + 2] = color[2];
self.buffer[base + 3] = color[3];
} else {
for c in 1..4 {
let alpha = f32::from(color[0]) / 255.0;
let color_diff =
(color[c] as isize - self.buffer[base + c] as isize) as f32 * alpha;
let new_color = (f32::from(self.buffer[base + c]) + color_diff) as u8;
self.buffer[base + c] = new_color as u8;
}
self.buffer[base] = 255 as u8;
}
}
/// Clears the entire canvas buffer by zeroing it
pub fn clear(&mut self) {
for i in 0..self.width * self.height * 4 {
self.buffer[i] = 0x00;
}
}
}

View File

@ -0,0 +1,84 @@
use std::cmp::min;
use line_drawing::Bresenham;
use line_drawing::XiaolinWu;
use Canvas;
use Drawable;
/// A drawable object that represents a line
pub struct Line {
/// The first point of the line
pub pt1: (usize, usize),
/// The second point of the line
pub pt2: (usize, usize),
/// The color of the line
pub color: [u8; 4],
/// Decides whether the line will be antialiased
pub antialiased: bool,
}
impl Line {
/// Creates a new Line object
pub fn new(
pt1: (usize, usize),
pt2: (usize, usize),
color: [u8; 4],
antialiased: bool,
) -> Line {
Line {
pt1,
pt2,
color,
antialiased,
}
}
}
impl Drawable for Line {
fn draw(&self, canvas: &mut Canvas) {
if !self.antialiased {
if self.pt1.0 == self.pt2.0 && self.pt1.0 < canvas.width {
let (min_y, max_y) = if self.pt1.1 > self.pt2.1 {
(self.pt2.1, self.pt1.1)
} else {
(self.pt1.1, self.pt2.1)
};
for y in min_y..min(max_y, canvas.height - 1) + 1 {
canvas.draw_point(self.pt1.0, y, self.color)
}
} else if self.pt1.1 == self.pt2.1 && self.pt1.1 < canvas.height {
let (min_x, max_x) = if self.pt1.0 > self.pt2.0 {
(self.pt2.0, self.pt1.0)
} else {
(self.pt1.0, self.pt2.0)
};
for x in min_x..min(max_x, canvas.width - 1) + 1 {
canvas.draw_point(x, self.pt1.1, self.color)
}
} else {
// Angled line without antialias
for (x, y) in Bresenham::new(
(self.pt1.0 as isize, self.pt1.1 as isize),
(self.pt2.0 as isize, self.pt2.1 as isize),
) {
if x < canvas.width as isize && y < canvas.height as isize {
canvas.draw_point(x as usize, y as usize, self.color)
}
}
}
} else {
// Angled line with antialias
for ((x, y), coverage) in XiaolinWu::<f32, isize>::new(
(self.pt1.0 as f32, self.pt1.1 as f32),
(self.pt2.0 as f32, self.pt2.1 as f32),
) {
if x < canvas.width as isize && y < canvas.height as isize {
let mut color = self.color;
color[3] = (f32::from(color[3]) * coverage) as u8;
canvas.draw_point(x as usize, y as usize, color)
}
}
}
}
}

View File

@ -0,0 +1,2 @@
/// A module that contains functions and objects relating to rectangles
pub mod rectangle;

View File

@ -0,0 +1,153 @@
use line::Line;
use Canvas;
use Drawable;
bitflags! {
/// The Sides bitflag presents the sides of a rectangle
pub struct Sides: u32 {
/// The top side of the rectangle
const TOP = 0b0001;
/// The bottom side of the rectangle
const BOTTOM = 0b0010;
/// The left side of the rectangle
const LEFT = 0b0100;
/// The right side of the rectangle
const RIGHT = 0b1000;
/// All sides of the rectangle
const ALL = Self::TOP.bits | Self::BOTTOM.bits | Self::LEFT.bits | Self::RIGHT.bits;
}
}
/// A drawable object that represents a rectangle
pub struct Rectangle {
/// Position of the top-left corner of rectangle
pub pos: (usize, usize),
/// The size of the rectangle to be drawn, the border will be contained within this size
pub size: (usize, usize),
/// The border that is drawn around the perimeter of the rectangle. It's arguments are
/// thickness of border, color of border, sides that the border is drawn around, rounding size
/// of the corners
pub border: Option<(usize, [u8; 4], Sides, Option<usize>)>,
/// The color of the fill (area) of the rectangle
pub fill: Option<[u8; 4]>,
}
impl Rectangle {
/// Creates a new Rectangle object
pub fn new(
pos: (usize, usize),
size: (usize, usize),
border: Option<(usize, [u8; 4], Sides, Option<usize>)>,
fill: Option<[u8; 4]>,
) -> Rectangle {
Rectangle {
pos,
size,
border,
fill,
}
}
fn draw_borders(&self, canvas: &mut Canvas) {
if let Some(border) = self.border {
for i in 0..border.0 {
let rounding_space = if let Some(round_size) = border.3 {
if i < round_size {
round_size
- ((round_size as f32).powi(2) - ((round_size - i - 1) as f32).powi(2))
.sqrt()
.round() as usize
} else {
0
}
} else {
0
};
// Top line
if border.2.contains(Sides::TOP) && canvas.width > rounding_space * 2 {
Line::new(
(self.pos.0 + rounding_space, self.pos.1 + i),
(self.pos.0 + self.size.0 - rounding_space, self.pos.1 + i),
border.1,
false,
)
.draw(canvas);
}
// Bottom line
if border.2.contains(Sides::BOTTOM) && canvas.width > rounding_space * 2 {
Line::new(
(self.pos.0 + rounding_space, self.pos.1 + self.size.1 - i),
(
self.pos.0 + self.size.0 - rounding_space,
self.pos.1 + self.size.1 - i,
),
border.1,
false,
)
.draw(canvas);
}
// Left line
if border.2.contains(Sides::LEFT) && canvas.height > rounding_space * 2 {
Line::new(
(self.pos.0 + i, self.pos.1 + rounding_space),
(self.pos.0 + i, self.pos.1 + self.size.1 - rounding_space),
border.1,
false,
)
.draw(canvas);
}
// Right line
if border.2.contains(Sides::RIGHT) && canvas.height > rounding_space * 2 {
Line::new(
(self.pos.0 + self.size.0 - i, self.pos.1 + rounding_space),
(
self.pos.0 + self.size.0 - i,
self.pos.1 + self.size.1 - rounding_space,
),
border.1,
false,
)
.draw(canvas);
}
}
}
}
fn draw_area(&self, canvas: &mut Canvas) {
if let Some(fill) = self.fill {
let (area_pos, area_size) = self.measure_area();
for y in area_pos.1..area_pos.1 + area_size.1 + 1 {
Line::new((area_pos.0, y), (area_pos.0 + area_size.0, y), fill, false).draw(canvas)
}
}
}
fn measure_area(&self) -> ((usize, usize), (usize, usize)) {
let (mut area_pos, mut area_size) = (self.pos, self.size);
if let Some(border) = self.border {
if border.2.contains(Sides::TOP) {
area_pos.1 += border.0;
area_size.1 -= border.0;
}
if border.2.contains(Sides::BOTTOM) {
area_size.1 -= border.0;
}
if border.2.contains(Sides::LEFT) {
area_pos.0 += border.0;
area_size.0 -= border.0;
}
if border.2.contains(Sides::RIGHT) {
area_size.0 -= border.0;
}
}
(area_pos, area_size)
}
}
impl Drawable for Rectangle {
fn draw(&self, canvas: &mut Canvas) {
self.draw_borders(canvas);
self.draw_area(canvas);
}
}

View File

@ -0,0 +1,162 @@
use std::fs::File;
use std::io::prelude::*;
use std::path::{Path, PathBuf};
use xdg::BaseDirectories;
use xml::reader::{EventReader, XmlEvent};
use walkdir::WalkDir;
/// Locates fontconfig config
fn get_config() -> Option<PathBuf> {
let xdg_dirs = BaseDirectories::with_prefix("fontconfig").unwrap();
xdg_dirs.find_config_file("fonts.conf").or_else(|| {
let config = Path::new("/etc/fonts/fonts.conf");
if config.exists() {
Some(config.into())
} else {
None
}
})
}
fn parse_config(path: &Path) -> Vec<(Vec<String>, String)> {
let config_file = File::open(path).unwrap();
let parser = EventReader::new(config_file);
let mut tracking_tags: Vec<String> = Vec::new();
let mut xml_data: Vec<(Vec<String>, String)> = Vec::new();
for e in parser {
match e {
Ok(XmlEvent::StartElement { name, .. }) => {
tracking_tags.push(name.to_string());
}
Ok(XmlEvent::CData(data)) => {
xml_data.push((tracking_tags.clone(), data));
}
Ok(XmlEvent::Characters(data)) => {
xml_data.push((tracking_tags.clone(), data));
}
Ok(XmlEvent::EndElement { .. }) => {
tracking_tags.pop();
}
Err(e) => panic!(e),
_ => {}
}
}
xml_data
}
/// Represents the main fontconfig config file
pub struct FontConfig {
location: PathBuf,
data: Vec<(Vec<String>, String)>,
}
impl FontConfig {
/// Creates a new FontConfig object by looking for the fontconfig config file
pub fn new() -> Result<FontConfig, ()> {
let location = get_config().ok_or(())?;
let data = parse_config(&location);
Ok(FontConfig {
location: location.to_path_buf(),
data,
})
}
/// Returns the location of the fontconfig config file being used
pub fn get_location(&self) -> &Path {
&self.location
}
/// Get the directories that contain fonts
pub fn get_font_dirs(&self) -> Vec<PathBuf> {
let mut dirs = Vec::new();
for entry in &self.data {
if entry.0.last() == Some(&"dir".to_string()) {
let path = PathBuf::from(entry.1.clone());
if path.exists() {
dirs.push(path);
}
}
}
dirs
}
/// Return all fonts installed on the system
pub fn get_fonts(&self) -> Result<Vec<PathBuf>, ::std::io::Error> {
let mut fonts = Vec::new();
for dir in self.get_font_dirs() {
for file in WalkDir::new(dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|p| p.file_type().is_file())
{
let path = file.into_path();
if let Some(extension) = path.extension() {
match extension.to_str() {
Some("ttf") | Some("otf") => fonts.push(path.clone()),
_ => {}
}
}
}
}
Ok(fonts)
}
/// Return all 'fonts.dir' files in font directories
pub fn get_font_dir_files(&self) -> Result<Vec<PathBuf>, ::std::io::Error> {
let mut fonts = Vec::new();
for dir in self.get_font_dirs() {
for file in WalkDir::new(dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|p| p.file_type().is_file())
{
let path = file.into_path();
if let Some(file_name) = path.clone().file_name() {
if file_name.to_str() == Some("fonts.dir") {
fonts.push(path);
}
}
}
}
Ok(fonts)
}
/// Returns the paths of regular fonts belonging to a specific family installed on the system
pub fn get_regular_family_fonts(&self, family: &str) -> Result<Vec<PathBuf>, ::std::io::Error> {
let fonts_dir_files = self.get_font_dir_files()?;
let mut fonts: Vec<PathBuf> = Vec::new();
for dir in fonts_dir_files {
let mut file = ::std::fs::File::open(dir.clone()).unwrap();
let mut buf = String::new();
file.read_to_string(&mut buf)?;
for line in buf.lines().filter(|l| l.find("medium-r-normal").is_some()) {
if let Some(split) = line.find(' ') {
let name = line[..split].to_string();
let settings = line[split..].to_string();
let mut char_buf = String::new();
for c in settings.chars() {
if c == ' ' || c == '-' {
char_buf.clear()
} else {
char_buf.push(c);
if char_buf == family {
let path = dir.with_file_name(name);
if !fonts.contains(&path) {
fonts.push(path);
}
break;
}
}
}
}
}
}
Ok(fonts)
}
}

View File

@ -0,0 +1,121 @@
/// A module that contains functions and objects relating to fontconfig
pub mod fontconfig;
use rusttype::{point, Font, Scale, SharedBytes, VMetrics};
use std::fs::File;
use std::io::Read;
use std::path::PathBuf;
use Canvas;
use Drawable;
/// A drawable object that represents text
pub struct Text<'a> {
/// The position of the text on the canvas
pub pos: (usize, usize),
/// The color of the text
pub color: [u8; 4],
/// The text that is rendered to the canvas on draw
pub text: String,
/// The font used in rendering the text
pub font: Font<'a>,
/// The scale that is applied to the text
pub scale: Scale,
/// The vertical metrics of the text
pub v_metrics: VMetrics,
}
/// Loads a font file into a `Vec<u8>`
pub fn load_font_file<P: Into<PathBuf>>(path: P) -> Vec<u8> {
let mut data: Vec<u8> = Vec::new();
let mut file = File::open(path.into()).expect("Could not open font file");
file.read_to_end(&mut data)
.expect("Could not read font file");
data
}
impl<'a> Text<'a> {
/// Creates a new Text object
pub fn new<P: Into<SharedBytes<'a>>, T: Into<String>>(
pos: (usize, usize),
color: [u8; 4],
font_data: P,
height: f32,
width_scale: f32,
text: T,
) -> Text<'a> {
let text = text.into();
// Create font
let font = Font::from_bytes(font_data).expect("Error constructing Font");
// Create scale
let scale = Scale {
x: height * width_scale,
y: height,
};
// Create needed metrics
let v_metrics = font.v_metrics(scale);
Text {
pos,
color,
text: text.clone(),
scale,
v_metrics,
font,
}
}
fn draw_text(&self, canvas: &mut Canvas) {
let glyphs: Vec<_> = self
.font
.layout(&self.text, self.scale, point(0.0, self.v_metrics.ascent))
.collect();
for glyph in glyphs {
if let Some(bounding_box) = glyph.pixel_bounding_box() {
glyph.draw(|x, y, v| {
let x = ((x as usize + self.pos.0) as i32 + bounding_box.min.x) as usize;
let y = ((y as usize + self.pos.1) as i32 + bounding_box.min.y) as usize;
if x < canvas.width && y < canvas.height {
let mut color = self.color;
color[0] = (f32::from(color[0]) * v) as u8;
canvas.draw_point(x, y, color);
}
});
}
}
}
/// Calculates the width in pixels of the text
pub fn get_width(&self) -> usize {
let glyphs: Vec<_> = self
.font
.layout(&self.text, self.scale, point(0.0, self.v_metrics.ascent))
.collect();
let min_x = glyphs
.first()
.map(|g| {
if let Some(bb) = g.pixel_bounding_box() {
bb.min.x
} else {
g.position().x as i32
}
})
.unwrap_or(0);
let max_x = glyphs
.last()
.map(|g| {
if let Some(bb) = g.pixel_bounding_box() {
bb.max.x
} else {
(g.position().x + g.unpositioned().h_metrics().advance_width) as i32
}
})
.unwrap_or(0);
(max_x - min_x) as usize
}
}
impl<'a> Drawable for Text<'a> {
fn draw(&self, canvas: &mut Canvas) {
self.draw_text(canvas);
}
}

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"dbe01d8dfe2ea291ca94d56acfe3a401bbaf81d960be2d2afc5e916f755a9ab7","src/lib.rs":"d364185fd66b549a70b935fdfec041b55a10cdc0dd901fd95b38554f08cf0923"},"package":"000444226fcff248f2bc4c7625be32c63caccfecc2723a2b9f78a7487a49c407"}

View File

@ -0,0 +1,42 @@
"""
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # "MIT"
])
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_library",
"rust_binary",
"rust_test",
)
rust_library(
name = "android_glue",
crate_root = "src/lib.rs",
crate_type = "lib",
edition = "2015",
srcs = glob(["**/*.rs"]),
deps = [
],
rustc_flags = [
"--cap-lints=allow",
],
version = "0.2.3",
crate_features = [
],
)

View File

@ -0,0 +1,8 @@
[package]
name = "android_glue"
version = "0.2.3"
authors = ["Pierre Krieger <pierre.krieger1708@gmail.com>"]
license = "MIT"
description = "Glue for the Android JNI"
repository = "https://github.com/tomaka/android-rs-glue"

View File

@ -0,0 +1,152 @@
#![cfg(target_os = "android")]
extern {
fn cargo_apk_injected_glue_get_native_window() -> *const c_void;
fn cargo_apk_injected_glue_add_sender(sender: *mut ());
fn cargo_apk_injected_glue_add_sender_missing(sender: *mut ());
fn cargo_apk_injected_glue_add_sync_event_handler(sender: *mut ());
fn cargo_apk_injected_glue_remove_sync_event_handler(sender: *mut ());
fn cargo_apk_injected_glue_set_multitouch(multitouch: bool);
fn cargo_apk_injected_glue_write_log(ptr: *const (), len: usize);
fn cargo_apk_injected_glue_load_asset(ptr: *const (), len: usize) -> *mut c_void;
fn cargo_apk_injected_glue_wake_event_loop();
}
use std::mem;
use std::os::raw::c_void;
use std::sync::mpsc::Sender;
/// An event triggered by the Android environment.
#[derive(Clone, Copy, Debug)]
pub enum Event {
EventMotion(Motion),
EventKeyUp,
EventKeyDown,
InitWindow,
SaveState,
TermWindow,
GainedFocus,
LostFocus,
InputChanged,
WindowResized,
WindowRedrawNeeded,
ContentRectChanged,
ConfigChanged,
LowMemory,
Start,
Resume,
Pause,
Stop,
Destroy,
Wake
}
/// Data about a motion event.
#[derive(Clone, Copy, Debug)]
pub struct Motion {
pub action: MotionAction,
pub pointer_id: i32,
pub x: f32,
pub y: f32,
}
/// The type of pointer action in a motion event.
#[derive(Clone, Copy, Debug)]
pub enum MotionAction {
Down,
Move,
Up,
Cancel,
}
pub enum AssetError {
AssetMissing,
EmptyBuffer,
}
// Trait used to dispatch sync events from the polling loop thread.
pub trait SyncEventHandler {
fn handle(&mut self, event: &Event);
}
/// Adds a sender where events will be sent to.
#[inline]
pub fn add_sender(sender: Sender<Event>) {
unsafe {
let sender = Box::into_raw(Box::new(sender)) as *mut _;
cargo_apk_injected_glue_add_sender(sender);
}
}
/// Adds a SyncEventHandler which will receive sync events from the polling loop.
#[inline]
pub fn add_sync_event_handler(handler: Box<SyncEventHandler>) {
unsafe {
let handler = Box::into_raw(Box::new(handler)) as *mut _;
cargo_apk_injected_glue_add_sync_event_handler(handler);
}
}
/// Removes a SyncEventHandler.
#[inline]
pub fn remove_sync_event_handler(handler: *const SyncEventHandler) {
unsafe {
let handler = Box::into_raw(Box::new(handler)) as *mut _;
cargo_apk_injected_glue_remove_sync_event_handler(handler);
}
}
#[inline]
pub fn set_multitouch(multitouch: bool) {
unsafe {
cargo_apk_injected_glue_set_multitouch(multitouch);
}
}
/// Adds a sender where events will be sent to, but also sends
/// any missing events to the sender object.
///
/// The missing events happen when the application starts, but before
/// any senders are registered. Since these might be important to certain
/// applications, this function provides that support.
#[inline]
pub fn add_sender_missing(sender: Sender<Event>) {
unsafe {
let sender = Box::into_raw(Box::new(sender)) as *mut _;
cargo_apk_injected_glue_add_sender_missing(sender);
}
}
/// Returns a handle to the native window.
#[inline]
pub unsafe fn get_native_window() -> *const c_void {
cargo_apk_injected_glue_get_native_window()
}
///
#[inline]
pub fn write_log(message: &str) {
unsafe {
let (message_ptr, message_len) = mem::transmute(message);
cargo_apk_injected_glue_write_log(message_ptr, message_len);
}
}
#[inline]
pub fn load_asset(filename: &str) -> Result<Vec<u8>, AssetError> {
unsafe {
let (filename_ptr, filename_len) = mem::transmute(filename);
let data = cargo_apk_injected_glue_load_asset(filename_ptr, filename_len);
let data: Box<Result<Vec<u8>, AssetError>> = Box::from_raw(data as *mut _);
*data
}
}
// Wakes the event poll asynchronously and sends a Event::Wake event to the senders.
// This method can be called on any thread. This method returns immediately.
#[inline]
pub fn wake_event_loop() {
unsafe {
cargo_apk_injected_glue_wake_event_loop();
}
}

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"bcd8107c8c85e92fb0b291014b8ba62bb1f076ec297028fd44e264765c303f2f","LICENSE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","README.md":"5e965438ec90b7ede0a5c93c482a91536759c147e215f7471d8534f121feb244","src/abs_diff_eq.rs":"b11c8128d1460ee9739abeedc49d8923fde2acb953481661f5adc94feef86761","src/lib.rs":"075aa97495af343ad128b59cdec0206eb06d30d153abe1c38226d9d0f55c0fc9","src/macros.rs":"a9b59cce0cdb3e154321e7ba6ed2d53c21eeb8a46840a761e36e0259b44a59c4","src/relative_eq.rs":"ff292f125853ff846adc29cfa260042bab5e329e7b47b732e863587b8946b9d2","src/ulps_eq.rs":"21ad336b9ed8762eb3b7b1dd1b47b001edc8497d85c8c385bf8b2964be5687e7","tests/abs_diff_eq.rs":"9df9b48842dbc61b04b0328b64f2ce48b6e66e9538178babd58a9e52adeaddaf","tests/macro_import.rs":"006d813c99217d251a30b2f248548e7ad385754f88f6d9838d1dee866ea4b7f4","tests/macros.rs":"697241db4db66633cb53324bb127c64fd3b544236055bfe1721abb64a1dfab23","tests/relative_eq.rs":"c114edb6af07a2ac126e167682dd5d677d5591217f48bfdba150f866dfe4fdaf","tests/ulps_eq.rs":"ef6d57b98394fc87e724e26de7a3461426444563ec962661f660875b0702aeb6"},"package":"f0e60b75072ecd4168020818c0107f2857bb6c4e64252d8d3983f6263b40a5c3"}

View File

@ -0,0 +1,48 @@
"""
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # "Apache-2.0"
])
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_library",
"rust_binary",
"rust_test",
)
# Unsupported target "abs_diff_eq" with type "test" omitted
rust_library(
name = "approx",
crate_root = "src/lib.rs",
crate_type = "lib",
edition = "2015",
srcs = glob(["**/*.rs"]),
deps = [
"//third_party/cargo/vendor/num-traits-0.2.11:num_traits",
],
rustc_flags = [
"--cap-lints=allow",
],
version = "0.3.2",
crate_features = [
],
)
# Unsupported target "macro_import" with type "test" omitted
# Unsupported target "macros" with type "test" omitted
# Unsupported target "relative_eq" with type "test" omitted
# Unsupported target "ulps_eq" with type "test" omitted

View File

@ -0,0 +1,39 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "approx"
version = "0.3.2"
authors = ["Brendan Zabarauskas <bjzaba@yahoo.com.au>"]
description = "Approximate floating point equality comparisons and assertions."
homepage = "https://github.com/brendanzab/approx"
documentation = "https://docs.rs/approx"
readme = "README.md"
keywords = ["approximate", "assert", "comparison", "equality", "float"]
license = "Apache-2.0"
repository = "https://github.com/brendanzab/approx"
[package.metadata.docs.rs]
features = ["std", "num-complex"]
[lib]
name = "approx"
[dependencies.num-complex]
version = "0.2.0"
optional = true
[dependencies.num-traits]
version = "0.2.0"
default_features = false
[features]
default = ["std"]
std = []

View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,20 @@
# approx
[![Build Status][travis_badge]][travis_url]
[![Version][version_badge]][crate_url]
[![Documentation][docs_badge]][docs_url]
[![Downloads][downloads_badge]][crate_url]
[![License][license_badge]][license_url]
[travis_badge]: https://travis-ci.org/brendanzab/approx.svg?branch=master
[docs_badge]: https://docs.rs/approx/badge.svg
[version_badge]: https://img.shields.io/crates/v/approx.svg
[license_badge]: https://img.shields.io/crates/l/approx.svg
[downloads_badge]: https://img.shields.io/crates/d/approx.svg
[travis_url]: https://travis-ci.org/brendanzab/approx
[docs_url]: https://docs.rs/approx
[crate_url]: https://crates.io/crates/approx
[license_url]: https://github.com/brendanzab/approx/blob/master/LICENSE
Approximate floating point equality comparisons and assertions for the Rust Programming Language.

View File

@ -0,0 +1,185 @@
#[cfg(feature = "num-complex")]
use num_complex::Complex;
#[cfg(not(feature = "std"))]
use num_traits::float::FloatCore;
use std::{cell, f32, f64};
/// Equality that is defined using the absolute difference of two numbers.
pub trait AbsDiffEq<Rhs = Self>: PartialEq<Rhs>
where
Rhs: ?Sized,
{
/// Used for specifying relative comparisons.
type Epsilon;
/// The default tolerance to use when testing values that are close together.
///
/// This is used when no `epsilon` value is supplied to the `abs_diff_eq!`, `relative_eq!`, or
/// `ulps_eq!` macros.
fn default_epsilon() -> Self::Epsilon;
/// A test for equality that uses the absolute difference to compute the approximate
/// equality of two numbers.
fn abs_diff_eq(&self, other: &Rhs, epsilon: Self::Epsilon) -> bool;
/// The inverse of `ApproxEq::abs_diff_eq`.
fn abs_diff_ne(&self, other: &Rhs, epsilon: Self::Epsilon) -> bool {
!Self::abs_diff_eq(self, other, epsilon)
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// Base implementations
///////////////////////////////////////////////////////////////////////////////////////////////////
macro_rules! impl_unsigned_abs_diff_eq {
($T:ident, $default_epsilon:expr) => {
impl AbsDiffEq for $T {
type Epsilon = $T;
#[inline]
fn default_epsilon() -> $T {
$default_epsilon
}
#[inline]
fn abs_diff_eq(&self, other: &$T, epsilon: $T) -> bool {
(if self > other {
self - other
} else {
other - self
}) <= epsilon
}
}
};
}
impl_unsigned_abs_diff_eq!(u8, 0);
impl_unsigned_abs_diff_eq!(u16, 0);
impl_unsigned_abs_diff_eq!(u32, 0);
impl_unsigned_abs_diff_eq!(u64, 0);
impl_unsigned_abs_diff_eq!(usize, 0);
macro_rules! impl_signed_abs_diff_eq {
($T:ident, $default_epsilon:expr) => {
impl AbsDiffEq for $T {
type Epsilon = $T;
#[inline]
fn default_epsilon() -> $T {
$default_epsilon
}
#[inline]
fn abs_diff_eq(&self, other: &$T, epsilon: $T) -> bool {
$T::abs(self - other) <= epsilon
}
}
};
}
impl_signed_abs_diff_eq!(i8, 0);
impl_signed_abs_diff_eq!(i16, 0);
impl_signed_abs_diff_eq!(i32, 0);
impl_signed_abs_diff_eq!(i64, 0);
impl_signed_abs_diff_eq!(isize, 0);
impl_signed_abs_diff_eq!(f32, f32::EPSILON);
impl_signed_abs_diff_eq!(f64, f64::EPSILON);
///////////////////////////////////////////////////////////////////////////////////////////////////
// Derived implementations
///////////////////////////////////////////////////////////////////////////////////////////////////
impl<'a, T: AbsDiffEq + ?Sized> AbsDiffEq for &'a T {
type Epsilon = T::Epsilon;
#[inline]
fn default_epsilon() -> T::Epsilon {
T::default_epsilon()
}
#[inline]
fn abs_diff_eq(&self, other: &&'a T, epsilon: T::Epsilon) -> bool {
T::abs_diff_eq(*self, *other, epsilon)
}
}
impl<'a, T: AbsDiffEq + ?Sized> AbsDiffEq for &'a mut T {
type Epsilon = T::Epsilon;
#[inline]
fn default_epsilon() -> T::Epsilon {
T::default_epsilon()
}
#[inline]
fn abs_diff_eq(&self, other: &&'a mut T, epsilon: T::Epsilon) -> bool {
T::abs_diff_eq(*self, *other, epsilon)
}
}
impl<T: AbsDiffEq + Copy> AbsDiffEq for cell::Cell<T> {
type Epsilon = T::Epsilon;
#[inline]
fn default_epsilon() -> T::Epsilon {
T::default_epsilon()
}
#[inline]
fn abs_diff_eq(&self, other: &cell::Cell<T>, epsilon: T::Epsilon) -> bool {
T::abs_diff_eq(&self.get(), &other.get(), epsilon)
}
}
impl<T: AbsDiffEq + ?Sized> AbsDiffEq for cell::RefCell<T> {
type Epsilon = T::Epsilon;
#[inline]
fn default_epsilon() -> T::Epsilon {
T::default_epsilon()
}
#[inline]
fn abs_diff_eq(&self, other: &cell::RefCell<T>, epsilon: T::Epsilon) -> bool {
T::abs_diff_eq(&self.borrow(), &other.borrow(), epsilon)
}
}
impl<A, B> AbsDiffEq<[B]> for [A]
where
A: AbsDiffEq<B>,
A::Epsilon: Clone,
{
type Epsilon = A::Epsilon;
#[inline]
fn default_epsilon() -> A::Epsilon {
A::default_epsilon()
}
#[inline]
fn abs_diff_eq(&self, other: &[B], epsilon: A::Epsilon) -> bool {
self.len() == other.len()
&& Iterator::zip(self.iter(), other).all(|(x, y)| A::abs_diff_eq(x, y, epsilon.clone()))
}
}
#[cfg(feature = "num-complex")]
impl<T: AbsDiffEq> AbsDiffEq for Complex<T>
where
T::Epsilon: Clone,
{
type Epsilon = T::Epsilon;
#[inline]
fn default_epsilon() -> T::Epsilon {
T::default_epsilon()
}
#[inline]
fn abs_diff_eq(&self, other: &Complex<T>, epsilon: T::Epsilon) -> bool {
T::abs_diff_eq(&self.re, &other.re, epsilon.clone())
&& T::abs_diff_eq(&self.im, &other.im, epsilon.clone())
}
}

View File

@ -0,0 +1,384 @@
// Copyright 2015 Brendan Zabarauskas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! A crate that provides facilities for testing the approximate equality of floating-point
//! based types, using either relative difference, or units in the last place (ULPs)
//! comparisons.
//!
//! You can also use the `approx_{eq, ne}!` `assert_approx_{eq, ne}!` macros to test for equality
//! using a more positional style.
//!
//! ```rust
//! #[macro_use]
//! extern crate approx;
//!
//! use std::f64;
//!
//! # fn main() {
//! abs_diff_eq!(1.0, 1.0);
//! abs_diff_eq!(1.0, 1.0, epsilon = f64::EPSILON);
//!
//! relative_eq!(1.0, 1.0);
//! relative_eq!(1.0, 1.0, epsilon = f64::EPSILON);
//! relative_eq!(1.0, 1.0, max_relative = 1.0);
//! relative_eq!(1.0, 1.0, epsilon = f64::EPSILON, max_relative = 1.0);
//! relative_eq!(1.0, 1.0, max_relative = 1.0, epsilon = f64::EPSILON);
//!
//! ulps_eq!(1.0, 1.0);
//! ulps_eq!(1.0, 1.0, epsilon = f64::EPSILON);
//! ulps_eq!(1.0, 1.0, max_ulps = 4);
//! ulps_eq!(1.0, 1.0, epsilon = f64::EPSILON, max_ulps = 4);
//! ulps_eq!(1.0, 1.0, max_ulps = 4, epsilon = f64::EPSILON);
//! # }
//! ```
//!
//! # Implementing approximate equality for custom types
//!
//! The `ApproxEq` trait allows approximate equalities to be implemented on types, based on the
//! fundamental floating point implementations.
//!
//! For example, we might want to be able to do approximate assertions on a complex number type:
//!
//! ```rust
//! #[macro_use]
//! extern crate approx;
//! # use approx::{AbsDiffEq, RelativeEq, UlpsEq};
//!
//! #[derive(Debug, PartialEq)]
//! struct Complex<T> {
//! x: T,
//! i: T,
//! }
//! # impl<T: AbsDiffEq> AbsDiffEq for Complex<T> where T::Epsilon: Copy {
//! # type Epsilon = T::Epsilon;
//! # fn default_epsilon() -> T::Epsilon { T::default_epsilon() }
//! # fn abs_diff_eq(&self, other: &Self, epsilon: T::Epsilon) -> bool {
//! # T::abs_diff_eq(&self.x, &other.x, epsilon) &&
//! # T::abs_diff_eq(&self.i, &other.i, epsilon)
//! # }
//! # }
//! # impl<T: RelativeEq> RelativeEq for Complex<T> where T::Epsilon: Copy {
//! # fn default_max_relative() -> T::Epsilon { T::default_max_relative() }
//! # fn relative_eq(&self, other: &Self, epsilon: T::Epsilon, max_relative: T::Epsilon)
//! # -> bool {
//! # T::relative_eq(&self.x, &other.x, epsilon, max_relative) &&
//! # T::relative_eq(&self.i, &other.i, epsilon, max_relative)
//! # }
//! # }
//! # impl<T: UlpsEq> UlpsEq for Complex<T> where T::Epsilon: Copy {
//! # fn default_max_ulps() -> u32 { T::default_max_ulps() }
//! # fn ulps_eq(&self, other: &Self, epsilon: T::Epsilon, max_ulps: u32) -> bool {
//! # T::ulps_eq(&self.x, &other.x, epsilon, max_ulps) &&
//! # T::ulps_eq(&self.i, &other.i, epsilon, max_ulps)
//! # }
//! # }
//!
//! # fn main() {
//! let x = Complex { x: 1.2, i: 2.3 };
//!
//! assert_relative_eq!(x, x);
//! assert_ulps_eq!(x, x, max_ulps = 4);
//! # }
//! ```
//!
//! To do this we can implement `AbsDiffEq`, `RelativeEq` and `UlpsEq` generically in terms of a
//! type parameter that also implements `ApproxEq`, `RelativeEq` and `UlpsEq` respectively. This
//! means that we can make comparisons for either `Complex<f32>` or `Complex<f64>`:
//!
//! ```rust
//! # use approx::{AbsDiffEq, RelativeEq, UlpsEq};
//! # #[derive(Debug, PartialEq)]
//! # struct Complex<T> { x: T, i: T, }
//! #
//! impl<T: AbsDiffEq> AbsDiffEq for Complex<T> where
//! T::Epsilon: Copy,
//! {
//! type Epsilon = T::Epsilon;
//!
//! fn default_epsilon() -> T::Epsilon {
//! T::default_epsilon()
//! }
//!
//! fn abs_diff_eq(&self, other: &Self, epsilon: T::Epsilon) -> bool {
//! T::abs_diff_eq(&self.x, &other.x, epsilon) &&
//! T::abs_diff_eq(&self.i, &other.i, epsilon)
//! }
//! }
//!
//! impl<T: RelativeEq> RelativeEq for Complex<T> where
//! T::Epsilon: Copy,
//! {
//! fn default_max_relative() -> T::Epsilon {
//! T::default_max_relative()
//! }
//!
//! fn relative_eq(&self, other: &Self, epsilon: T::Epsilon, max_relative: T::Epsilon) -> bool {
//! T::relative_eq(&self.x, &other.x, epsilon, max_relative) &&
//! T::relative_eq(&self.i, &other.i, epsilon, max_relative)
//! }
//! }
//!
//! impl<T: UlpsEq> UlpsEq for Complex<T> where
//! T::Epsilon: Copy,
//! {
//! fn default_max_ulps() -> u32 {
//! T::default_max_ulps()
//! }
//!
//! fn ulps_eq(&self, other: &Self, epsilon: T::Epsilon, max_ulps: u32) -> bool {
//! T::ulps_eq(&self.x, &other.x, epsilon, max_ulps) &&
//! T::ulps_eq(&self.i, &other.i, epsilon, max_ulps)
//! }
//! }
//! ```
//!
//! # References
//!
//! Floating point is hard! Thanks goes to these links for helping to make things a _little_
//! easier to understand:
//!
//! - [Comparing Floating Point Numbers, 2012 Edition]
//! (https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/)
//! - [The Floating Point Guide - Comparison](http://floating-point-gui.de/errors/comparison/)
//! - [What Every Computer Scientist Should Know About Floating-Point Arithmetic]
//! (https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html)
#![cfg_attr(not(feature = "std"), no_std)]
#[cfg(feature = "num-complex")]
extern crate num_complex;
extern crate num_traits;
#[cfg(not(feature = "std"))]
use core as std;
mod abs_diff_eq;
mod relative_eq;
mod ulps_eq;
mod macros;
pub use abs_diff_eq::AbsDiffEq;
pub use relative_eq::RelativeEq;
pub use ulps_eq::UlpsEq;
/// The requisite parameters for testing for approximate equality using a
/// absolute difference based comparison.
///
/// This is not normally used directly, rather via the
/// `assert_abs_diff_{eq|ne}!` and `abs_diff_{eq|ne}!` macros.
///
/// # Example
///
/// ```rust
/// use std::f64;
/// use approx::AbsDiff;
///
/// AbsDiff::default().eq(&1.0, &1.0);
/// AbsDiff::default().epsilon(f64::EPSILON).eq(&1.0, &1.0);
/// ```
pub struct AbsDiff<A, B = A>
where
A: AbsDiffEq<B> + ?Sized,
B: ?Sized,
{
/// The tolerance to use when testing values that are close together.
pub epsilon: A::Epsilon,
}
impl<A, B> Default for AbsDiff<A, B>
where
A: AbsDiffEq<B> + ?Sized,
B: ?Sized,
{
#[inline]
fn default() -> AbsDiff<A, B> {
AbsDiff {
epsilon: A::default_epsilon(),
}
}
}
impl<A, B> AbsDiff<A, B>
where
A: AbsDiffEq<B> + ?Sized,
B: ?Sized,
{
/// Replace the epsilon value with the one specified.
#[inline]
pub fn epsilon(self, epsilon: A::Epsilon) -> AbsDiff<A, B> {
AbsDiff { epsilon, ..self }
}
/// Peform the equality comparison
#[inline]
pub fn eq(self, lhs: &A, rhs: &B) -> bool {
A::abs_diff_eq(lhs, rhs, self.epsilon)
}
/// Peform the inequality comparison
#[inline]
pub fn ne(self, lhs: &A, rhs: &B) -> bool {
A::abs_diff_ne(lhs, rhs, self.epsilon)
}
}
/// The requisite parameters for testing for approximate equality using a
/// relative based comparison.
///
/// This is not normally used directly, rather via the
/// `assert_relative_{eq|ne}!` and `relative_{eq|ne}!` macros.
///
/// # Example
///
/// ```rust
/// use std::f64;
/// use approx::Relative;
///
/// Relative::default().eq(&1.0, &1.0);
/// Relative::default().epsilon(f64::EPSILON).eq(&1.0, &1.0);
/// Relative::default().max_relative(1.0).eq(&1.0, &1.0);
/// Relative::default().epsilon(f64::EPSILON).max_relative(1.0).eq(&1.0, &1.0);
/// Relative::default().max_relative(1.0).epsilon(f64::EPSILON).eq(&1.0, &1.0);
/// ```
pub struct Relative<A, B = A>
where
A: RelativeEq<B> + ?Sized,
B: ?Sized,
{
/// The tolerance to use when testing values that are close together.
pub epsilon: A::Epsilon,
/// The relative tolerance for testing values that are far-apart.
pub max_relative: A::Epsilon,
}
impl<A, B> Default for Relative<A, B>
where
A: RelativeEq<B> + ?Sized,
B: ?Sized,
{
#[inline]
fn default() -> Relative<A, B> {
Relative {
epsilon: A::default_epsilon(),
max_relative: A::default_max_relative(),
}
}
}
impl<A, B> Relative<A, B>
where
A: RelativeEq<B> + ?Sized,
B: ?Sized,
{
/// Replace the epsilon value with the one specified.
#[inline]
pub fn epsilon(self, epsilon: A::Epsilon) -> Relative<A, B> {
Relative { epsilon, ..self }
}
/// Replace the maximum relative value with the one specified.
#[inline]
pub fn max_relative(self, max_relative: A::Epsilon) -> Relative<A, B> {
Relative {
max_relative,
..self
}
}
/// Peform the equality comparison
#[inline]
pub fn eq(self, lhs: &A, rhs: &B) -> bool {
A::relative_eq(lhs, rhs, self.epsilon, self.max_relative)
}
/// Peform the inequality comparison
#[inline]
pub fn ne(self, lhs: &A, rhs: &B) -> bool {
A::relative_ne(lhs, rhs, self.epsilon, self.max_relative)
}
}
/// The requisite parameters for testing for approximate equality using an ULPs
/// based comparison.
///
/// This is not normally used directly, rather via the `assert_ulps_{eq|ne}!`
/// and `ulps_{eq|ne}!` macros.
///
/// # Example
///
/// ```rust
/// use std::f64;
/// use approx::Ulps;
///
/// Ulps::default().eq(&1.0, &1.0);
/// Ulps::default().epsilon(f64::EPSILON).eq(&1.0, &1.0);
/// Ulps::default().max_ulps(4).eq(&1.0, &1.0);
/// Ulps::default().epsilon(f64::EPSILON).max_ulps(4).eq(&1.0, &1.0);
/// Ulps::default().max_ulps(4).epsilon(f64::EPSILON).eq(&1.0, &1.0);
/// ```
pub struct Ulps<A, B = A>
where
A: UlpsEq<B> + ?Sized,
B: ?Sized,
{
/// The tolerance to use when testing values that are close together.
pub epsilon: A::Epsilon,
/// The ULPs to tolerate when testing values that are far-apart.
pub max_ulps: u32,
}
impl<A, B> Default for Ulps<A, B>
where
A: UlpsEq<B> + ?Sized,
B: ?Sized,
{
#[inline]
fn default() -> Ulps<A, B> {
Ulps {
epsilon: A::default_epsilon(),
max_ulps: A::default_max_ulps(),
}
}
}
impl<A, B> Ulps<A, B>
where
A: UlpsEq<B> + ?Sized,
B: ?Sized,
{
/// Replace the epsilon value with the one specified.
#[inline]
pub fn epsilon(self, epsilon: A::Epsilon) -> Ulps<A, B> {
Ulps { epsilon, ..self }
}
/// Replace the max ulps value with the one specified.
#[inline]
pub fn max_ulps(self, max_ulps: u32) -> Ulps<A, B> {
Ulps { max_ulps, ..self }
}
/// Peform the equality comparison
#[inline]
pub fn eq(self, lhs: &A, rhs: &B) -> bool {
A::ulps_eq(lhs, rhs, self.epsilon, self.max_ulps)
}
/// Peform the inequality comparison
#[inline]
pub fn ne(self, lhs: &A, rhs: &B) -> bool {
A::ulps_ne(lhs, rhs, self.epsilon, self.max_ulps)
}
}

View File

@ -0,0 +1,187 @@
// Copyright 2015 Brendan Zabarauskas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/// Approximate equality of using the absolute difference.
#[macro_export]
macro_rules! abs_diff_eq {
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
$crate::AbsDiff::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
};
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
$crate::AbsDiff::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
};
}
/// Approximate inequality of using the absolute difference.
#[macro_export]
macro_rules! abs_diff_ne {
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
$crate::AbsDiff::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
};
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
$crate::AbsDiff::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
};
}
/// Approximate equality using both the absolute difference and relative based comparisons.
#[macro_export]
macro_rules! relative_eq {
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
$crate::Relative::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
};
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
$crate::Relative::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
};
}
/// Approximate inequality using both the absolute difference and relative based comparisons.
#[macro_export]
macro_rules! relative_ne {
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
$crate::Relative::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
};
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
$crate::Relative::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
};
}
/// Approximate equality using both the absolute difference and ULPs (Units in Last Place).
#[macro_export]
macro_rules! ulps_eq {
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
$crate::Ulps::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
};
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
$crate::Ulps::default()$(.$opt($val))*.eq(&$lhs, &$rhs)
};
}
/// Approximate inequality using both the absolute difference and ULPs (Units in Last Place).
#[macro_export]
macro_rules! ulps_ne {
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*) => {
$crate::Ulps::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
};
($lhs:expr, $rhs:expr $(, $opt:ident = $val:expr)*,) => {
$crate::Ulps::default()$(.$opt($val))*.ne(&$lhs, &$rhs)
};
}
#[doc(hidden)]
#[macro_export]
macro_rules! __assert_approx {
($eq:ident, $given:expr, $expected:expr) => {{
let (given, expected) = (&($given), &($expected));
if !$eq!(*given, *expected) {
panic!(
"assert_{}!({}, {})
left = {:?}
right = {:?}
",
stringify!($eq),
stringify!($given),
stringify!($expected),
given, expected,
);
}
}};
($eq:ident, $given:expr, $expected:expr, $($opt:ident = $val:expr),+) => {{
let (given, expected) = (&($given), &($expected));
if !$eq!(*given, *expected, $($opt = $val),+) {
panic!(
"assert_{}!({}, {}, {})
left = {:?}
right = {:?}
",
stringify!($eq),
stringify!($given),
stringify!($expected),
stringify!($($opt = $val),+),
given, expected,
);
}
}};
}
/// An assertion that delegates to `abs_diff_eq!`, and panics with a helpful error on failure.
#[macro_export(local_inner_macros)]
macro_rules! assert_abs_diff_eq {
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
__assert_approx!(abs_diff_eq, $given, $expected $(, $opt = $val)*)
};
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
__assert_approx!(abs_diff_eq, $given, $expected $(, $opt = $val)*)
};
}
/// An assertion that delegates to `abs_diff_ne!`, and panics with a helpful error on failure.
#[macro_export(local_inner_macros)]
macro_rules! assert_abs_diff_ne {
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
__assert_approx!(abs_diff_ne, $given, $expected $(, $opt = $val)*)
};
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
__assert_approx!(abs_diff_ne, $given, $expected $(, $opt = $val)*)
};
}
/// An assertion that delegates to `relative_eq!`, and panics with a helpful error on failure.
#[macro_export(local_inner_macros)]
macro_rules! assert_relative_eq {
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
__assert_approx!(relative_eq, $given, $expected $(, $opt = $val)*)
};
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
__assert_approx!(relative_eq, $given, $expected $(, $opt = $val)*)
};
}
/// An assertion that delegates to `relative_ne!`, and panics with a helpful error on failure.
#[macro_export(local_inner_macros)]
macro_rules! assert_relative_ne {
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
__assert_approx!(relative_ne, $given, $expected $(, $opt = $val)*)
};
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
__assert_approx!(relative_ne, $given, $expected $(, $opt = $val)*)
};
}
/// An assertion that delegates to `ulps_eq!`, and panics with a helpful error on failure.
#[macro_export(local_inner_macros)]
macro_rules! assert_ulps_eq {
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
__assert_approx!(ulps_eq, $given, $expected $(, $opt = $val)*)
};
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
__assert_approx!(ulps_eq, $given, $expected $(, $opt = $val)*)
};
}
/// An assertion that delegates to `ulps_ne!`, and panics with a helpful error on failure.
#[macro_export(local_inner_macros)]
macro_rules! assert_ulps_ne {
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*) => {
__assert_approx!(ulps_ne, $given, $expected $(, $opt = $val)*)
};
($given:expr, $expected:expr $(, $opt:ident = $val:expr)*,) => {
__assert_approx!(ulps_ne, $given, $expected $(, $opt = $val)*)
};
}

View File

@ -0,0 +1,196 @@
#[cfg(feature = "num-complex")]
use num_complex::Complex;
#[cfg(not(feature = "std"))]
use num_traits::float::FloatCore;
use std::{cell, f32, f64};
use AbsDiffEq;
/// Equality comparisons between two numbers using both the absolute difference and
/// relative based comparisons.
pub trait RelativeEq<Rhs = Self>: AbsDiffEq<Rhs>
where
Rhs: ?Sized,
{
/// The default relative tolerance for testing values that are far-apart.
///
/// This is used when no `max_relative` value is supplied to the `relative_eq` macro.
fn default_max_relative() -> Self::Epsilon;
/// A test for equality that uses a relative comparison if the values are far apart.
fn relative_eq(
&self,
other: &Rhs,
epsilon: Self::Epsilon,
max_relative: Self::Epsilon,
) -> bool;
/// The inverse of `ApproxEq::relative_eq`.
fn relative_ne(
&self,
other: &Rhs,
epsilon: Self::Epsilon,
max_relative: Self::Epsilon,
) -> bool {
!Self::relative_eq(self, other, epsilon, max_relative)
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// Base implementations
///////////////////////////////////////////////////////////////////////////////////////////////////
// Implementation based on: [Comparing Floating Point Numbers, 2012 Edition]
// (https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/)
macro_rules! impl_relative_eq {
($T:ident, $U:ident) => {
impl RelativeEq for $T {
#[inline]
fn default_max_relative() -> $T {
$T::EPSILON
}
#[inline]
fn relative_eq(&self, other: &$T, epsilon: $T, max_relative: $T) -> bool {
// Handle same infinities
if self == other {
return true;
}
// Handle remaining infinities
if $T::is_infinite(*self) || $T::is_infinite(*other) {
return false;
}
let abs_diff = $T::abs(self - other);
// For when the numbers are really close together
if abs_diff <= epsilon {
return true;
}
let abs_self = $T::abs(*self);
let abs_other = $T::abs(*other);
let largest = if abs_other > abs_self {
abs_other
} else {
abs_self
};
// Use a relative difference comparison
abs_diff <= largest * max_relative
}
}
};
}
impl_relative_eq!(f32, i32);
impl_relative_eq!(f64, i64);
///////////////////////////////////////////////////////////////////////////////////////////////////
// Derived implementations
///////////////////////////////////////////////////////////////////////////////////////////////////
impl<'a, T: RelativeEq + ?Sized> RelativeEq for &'a T {
#[inline]
fn default_max_relative() -> T::Epsilon {
T::default_max_relative()
}
#[inline]
fn relative_eq(&self, other: &&'a T, epsilon: T::Epsilon, max_relative: T::Epsilon) -> bool {
T::relative_eq(*self, *other, epsilon, max_relative)
}
}
impl<'a, T: RelativeEq + ?Sized> RelativeEq for &'a mut T {
#[inline]
fn default_max_relative() -> T::Epsilon {
T::default_max_relative()
}
#[inline]
fn relative_eq(
&self,
other: &&'a mut T,
epsilon: T::Epsilon,
max_relative: T::Epsilon,
) -> bool {
T::relative_eq(*self, *other, epsilon, max_relative)
}
}
impl<T: RelativeEq + Copy> RelativeEq for cell::Cell<T> {
#[inline]
fn default_max_relative() -> T::Epsilon {
T::default_max_relative()
}
#[inline]
fn relative_eq(
&self,
other: &cell::Cell<T>,
epsilon: T::Epsilon,
max_relative: T::Epsilon,
) -> bool {
T::relative_eq(&self.get(), &other.get(), epsilon, max_relative)
}
}
impl<T: RelativeEq + ?Sized> RelativeEq for cell::RefCell<T> {
#[inline]
fn default_max_relative() -> T::Epsilon {
T::default_max_relative()
}
#[inline]
fn relative_eq(
&self,
other: &cell::RefCell<T>,
epsilon: T::Epsilon,
max_relative: T::Epsilon,
) -> bool {
T::relative_eq(&self.borrow(), &other.borrow(), epsilon, max_relative)
}
}
impl<A, B> RelativeEq<[B]> for [A]
where
A: RelativeEq<B>,
A::Epsilon: Clone,
{
#[inline]
fn default_max_relative() -> A::Epsilon {
A::default_max_relative()
}
#[inline]
fn relative_eq(&self, other: &[B], epsilon: A::Epsilon, max_relative: A::Epsilon) -> bool {
self.len() == other.len()
&& Iterator::zip(self.iter(), other)
.all(|(x, y)| A::relative_eq(x, y, epsilon.clone(), max_relative.clone()))
}
}
#[cfg(feature = "num-complex")]
impl<T: RelativeEq> RelativeEq for Complex<T>
where
T::Epsilon: Clone,
{
#[inline]
fn default_max_relative() -> T::Epsilon {
T::default_max_relative()
}
#[inline]
fn relative_eq(
&self,
other: &Complex<T>,
epsilon: T::Epsilon,
max_relative: T::Epsilon,
) -> bool {
T::relative_eq(&self.re, &other.re, epsilon.clone(), max_relative.clone())
&& T::relative_eq(&self.im, &other.im, epsilon.clone(), max_relative.clone())
}
}

View File

@ -0,0 +1,153 @@
#[cfg(feature = "num-complex")]
use num_complex::Complex;
#[cfg(not(feature = "std"))]
use num_traits::float::FloatCore;
use std::{cell, mem};
use AbsDiffEq;
/// Equality comparisons between two numbers using both the absolute difference and ULPs
/// (Units in Last Place) based comparisons.
pub trait UlpsEq<Rhs = Self>: AbsDiffEq<Rhs>
where
Rhs: ?Sized,
{
/// The default ULPs to tolerate when testing values that are far-apart.
///
/// This is used when no `max_ulps` value is supplied to the `ulps_eq` macro.
fn default_max_ulps() -> u32;
/// A test for equality that uses units in the last place (ULP) if the values are far apart.
fn ulps_eq(&self, other: &Rhs, epsilon: Self::Epsilon, max_ulps: u32) -> bool;
/// The inverse of `ApproxEq::ulps_eq`.
fn ulps_ne(&self, other: &Rhs, epsilon: Self::Epsilon, max_ulps: u32) -> bool {
!Self::ulps_eq(self, other, epsilon, max_ulps)
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// Base implementations
///////////////////////////////////////////////////////////////////////////////////////////////////
// Implementation based on: [Comparing Floating Point Numbers, 2012 Edition]
// (https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/)
macro_rules! impl_ulps_eq {
($T:ident, $U:ident) => {
impl UlpsEq for $T {
#[inline]
fn default_max_ulps() -> u32 {
4
}
#[inline]
fn ulps_eq(&self, other: &$T, epsilon: $T, max_ulps: u32) -> bool {
// For when the numbers are really close together
if $T::abs_diff_eq(self, other, epsilon) {
return true;
}
// Trivial negative sign check
if self.signum() != other.signum() {
return false;
}
// ULPS difference comparison
let int_self: $U = unsafe { mem::transmute(*self) };
let int_other: $U = unsafe { mem::transmute(*other) };
$U::abs(int_self - int_other) <= max_ulps as $U
}
}
};
}
impl_ulps_eq!(f32, i32);
impl_ulps_eq!(f64, i64);
///////////////////////////////////////////////////////////////////////////////////////////////////
// Derived implementations
///////////////////////////////////////////////////////////////////////////////////////////////////
impl<'a, T: UlpsEq + ?Sized> UlpsEq for &'a T {
#[inline]
fn default_max_ulps() -> u32 {
T::default_max_ulps()
}
#[inline]
fn ulps_eq(&self, other: &&'a T, epsilon: T::Epsilon, max_ulps: u32) -> bool {
T::ulps_eq(*self, *other, epsilon, max_ulps)
}
}
impl<'a, T: UlpsEq + ?Sized> UlpsEq for &'a mut T {
#[inline]
fn default_max_ulps() -> u32 {
T::default_max_ulps()
}
#[inline]
fn ulps_eq(&self, other: &&'a mut T, epsilon: T::Epsilon, max_ulps: u32) -> bool {
T::ulps_eq(*self, *other, epsilon, max_ulps)
}
}
impl<T: UlpsEq + Copy> UlpsEq for cell::Cell<T> {
#[inline]
fn default_max_ulps() -> u32 {
T::default_max_ulps()
}
#[inline]
fn ulps_eq(&self, other: &cell::Cell<T>, epsilon: T::Epsilon, max_ulps: u32) -> bool {
T::ulps_eq(&self.get(), &other.get(), epsilon, max_ulps)
}
}
impl<T: UlpsEq + ?Sized> UlpsEq for cell::RefCell<T> {
#[inline]
fn default_max_ulps() -> u32 {
T::default_max_ulps()
}
#[inline]
fn ulps_eq(&self, other: &cell::RefCell<T>, epsilon: T::Epsilon, max_ulps: u32) -> bool {
T::ulps_eq(&self.borrow(), &other.borrow(), epsilon, max_ulps)
}
}
impl<A, B> UlpsEq<[B]> for [A]
where
A: UlpsEq<B>,
A::Epsilon: Clone,
{
#[inline]
fn default_max_ulps() -> u32 {
A::default_max_ulps()
}
#[inline]
fn ulps_eq(&self, other: &[B], epsilon: A::Epsilon, max_ulps: u32) -> bool {
self.len() == other.len()
&& Iterator::zip(self.iter(), other)
.all(|(x, y)| A::ulps_eq(x, y, epsilon.clone(), max_ulps.clone()))
}
}
#[cfg(feature = "num-complex")]
impl<T: UlpsEq> UlpsEq for Complex<T>
where
T::Epsilon: Clone,
{
#[inline]
fn default_max_ulps() -> u32 {
T::default_max_ulps()
}
#[inline]
fn ulps_eq(&self, other: &Complex<T>, epsilon: T::Epsilon, max_ulps: u32) -> bool {
T::ulps_eq(&self.re, &other.re, epsilon.clone(), max_ulps)
&& T::ulps_eq(&self.im, &other.im, epsilon.clone(), max_ulps)
}
}

View File

@ -0,0 +1,442 @@
// Copyright 2015 Brendan Zabarauskas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Test cases derived from:
// https://github.com/Pybonacci/puntoflotante.org/blob/master/content/errors/NearlyEqualsTest.java
#[macro_use]
extern crate approx;
mod test_f32 {
use std::f32;
#[test]
fn test_basic() {
assert_abs_diff_eq!(1.0f32, 1.0f32);
assert_abs_diff_ne!(1.0f32, 2.0f32);
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_abs_diff_eq!(1.0f32, 2.0f32);
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_abs_diff_ne!(1.0f32, 1.0f32);
}
#[test]
fn test_big() {
assert_abs_diff_eq!(100000000.0f32, 100000001.0f32);
assert_abs_diff_eq!(100000001.0f32, 100000000.0f32);
assert_abs_diff_ne!(10000.0f32, 10001.0f32);
assert_abs_diff_ne!(10001.0f32, 10000.0f32);
}
#[test]
fn test_big_neg() {
assert_abs_diff_eq!(-100000000.0f32, -100000001.0f32);
assert_abs_diff_eq!(-100000001.0f32, -100000000.0f32);
assert_abs_diff_ne!(-10000.0f32, -10001.0f32);
assert_abs_diff_ne!(-10001.0f32, -10000.0f32);
}
#[test]
fn test_mid() {
assert_abs_diff_eq!(1.0000001f32, 1.0000002f32);
assert_abs_diff_eq!(1.0000002f32, 1.0000001f32);
assert_abs_diff_ne!(1.000001f32, 1.000002f32);
assert_abs_diff_ne!(1.000002f32, 1.000001f32);
}
#[test]
fn test_mid_neg() {
assert_abs_diff_eq!(-1.0000001f32, -1.0000002f32);
assert_abs_diff_eq!(-1.0000002f32, -1.0000001f32);
assert_abs_diff_ne!(-1.000001f32, -1.000002f32);
assert_abs_diff_ne!(-1.000002f32, -1.000001f32);
}
#[test]
fn test_small() {
assert_abs_diff_eq!(0.000010001f32, 0.000010002f32);
assert_abs_diff_eq!(0.000010002f32, 0.000010001f32);
assert_abs_diff_ne!(0.000001002f32, 0.0000001001f32);
assert_abs_diff_ne!(0.000001001f32, 0.0000001002f32);
}
#[test]
fn test_small_neg() {
assert_abs_diff_eq!(-0.000010001f32, -0.000010002f32);
assert_abs_diff_eq!(-0.000010002f32, -0.000010001f32);
assert_abs_diff_ne!(-0.000001002f32, -0.0000001001f32);
assert_abs_diff_ne!(-0.000001001f32, -0.0000001002f32);
}
#[test]
fn test_zero() {
assert_abs_diff_eq!(0.0f32, 0.0f32);
assert_abs_diff_eq!(0.0f32, -0.0f32);
assert_abs_diff_eq!(-0.0f32, -0.0f32);
assert_abs_diff_ne!(0.000001f32, 0.0f32);
assert_abs_diff_ne!(0.0f32, 0.000001f32);
assert_abs_diff_ne!(-0.000001f32, 0.0f32);
assert_abs_diff_ne!(0.0f32, -0.000001f32);
}
#[test]
fn test_epsilon() {
assert_abs_diff_eq!(0.0f32, 1e-40f32, epsilon = 1e-40f32);
assert_abs_diff_eq!(1e-40f32, 0.0f32, epsilon = 1e-40f32);
assert_abs_diff_eq!(0.0f32, -1e-40f32, epsilon = 1e-40f32);
assert_abs_diff_eq!(-1e-40f32, 0.0f32, epsilon = 1e-40f32);
assert_abs_diff_ne!(1e-40f32, 0.0f32, epsilon = 1e-41f32);
assert_abs_diff_ne!(0.0f32, 1e-40f32, epsilon = 1e-41f32);
assert_abs_diff_ne!(-1e-40f32, 0.0f32, epsilon = 1e-41f32);
assert_abs_diff_ne!(0.0f32, -1e-40f32, epsilon = 1e-41f32);
}
#[test]
fn test_max() {
assert_abs_diff_eq!(f32::MAX, f32::MAX);
assert_abs_diff_ne!(f32::MAX, -f32::MAX);
assert_abs_diff_ne!(-f32::MAX, f32::MAX);
assert_abs_diff_ne!(f32::MAX, f32::MAX / 2.0);
assert_abs_diff_ne!(f32::MAX, -f32::MAX / 2.0);
assert_abs_diff_ne!(-f32::MAX, f32::MAX / 2.0);
}
// NOTE: abs_diff_eq fails as numbers begin to get very large
// #[test]
// fn test_infinity() {
// assert_abs_diff_eq!(f32::INFINITY, f32::INFINITY);
// assert_abs_diff_eq!(f32::NEG_INFINITY, f32::NEG_INFINITY);
// assert_abs_diff_ne!(f32::NEG_INFINITY, f32::INFINITY);
// assert_abs_diff_eq!(f32::INFINITY, f32::MAX);
// assert_abs_diff_eq!(f32::NEG_INFINITY, -f32::MAX);
// }
#[test]
fn test_nan() {
assert_abs_diff_ne!(f32::NAN, f32::NAN);
assert_abs_diff_ne!(f32::NAN, 0.0);
assert_abs_diff_ne!(-0.0, f32::NAN);
assert_abs_diff_ne!(f32::NAN, -0.0);
assert_abs_diff_ne!(0.0, f32::NAN);
assert_abs_diff_ne!(f32::NAN, f32::INFINITY);
assert_abs_diff_ne!(f32::INFINITY, f32::NAN);
assert_abs_diff_ne!(f32::NAN, f32::NEG_INFINITY);
assert_abs_diff_ne!(f32::NEG_INFINITY, f32::NAN);
assert_abs_diff_ne!(f32::NAN, f32::MAX);
assert_abs_diff_ne!(f32::MAX, f32::NAN);
assert_abs_diff_ne!(f32::NAN, -f32::MAX);
assert_abs_diff_ne!(-f32::MAX, f32::NAN);
assert_abs_diff_ne!(f32::NAN, f32::MIN_POSITIVE);
assert_abs_diff_ne!(f32::MIN_POSITIVE, f32::NAN);
assert_abs_diff_ne!(f32::NAN, -f32::MIN_POSITIVE);
assert_abs_diff_ne!(-f32::MIN_POSITIVE, f32::NAN);
}
#[test]
fn test_opposite_signs() {
assert_abs_diff_ne!(1.000000001f32, -1.0f32);
assert_abs_diff_ne!(-1.0f32, 1.000000001f32);
assert_abs_diff_ne!(-1.000000001f32, 1.0f32);
assert_abs_diff_ne!(1.0f32, -1.000000001f32);
assert_abs_diff_eq!(10.0 * f32::MIN_POSITIVE, 10.0 * -f32::MIN_POSITIVE);
}
#[test]
fn test_close_to_zero() {
assert_abs_diff_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE);
assert_abs_diff_eq!(f32::MIN_POSITIVE, -f32::MIN_POSITIVE);
assert_abs_diff_eq!(-f32::MIN_POSITIVE, f32::MIN_POSITIVE);
assert_abs_diff_eq!(f32::MIN_POSITIVE, 0.0f32);
assert_abs_diff_eq!(0.0f32, f32::MIN_POSITIVE);
assert_abs_diff_eq!(-f32::MIN_POSITIVE, 0.0f32);
assert_abs_diff_eq!(0.0f32, -f32::MIN_POSITIVE);
assert_abs_diff_ne!(0.000001f32, -f32::MIN_POSITIVE);
assert_abs_diff_ne!(0.000001f32, f32::MIN_POSITIVE);
assert_abs_diff_ne!(f32::MIN_POSITIVE, 0.000001f32);
assert_abs_diff_ne!(-f32::MIN_POSITIVE, 0.000001f32);
}
}
#[cfg(test)]
mod test_f64 {
use std::f64;
#[test]
fn test_basic() {
assert_abs_diff_eq!(1.0f64, 1.0f64);
assert_abs_diff_ne!(1.0f64, 2.0f64);
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_abs_diff_eq!(1.0f64, 2.0f64);
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_abs_diff_ne!(1.0f64, 1.0f64);
}
#[test]
fn test_big() {
assert_abs_diff_eq!(10000000000000000.0f64, 10000000000000001.0f64);
assert_abs_diff_eq!(10000000000000001.0f64, 10000000000000000.0f64);
assert_abs_diff_ne!(1000000000000000.0f64, 1000000000000001.0f64);
assert_abs_diff_ne!(1000000000000001.0f64, 1000000000000000.0f64);
}
#[test]
fn test_big_neg() {
assert_abs_diff_eq!(-10000000000000000.0f64, -10000000000000001.0f64);
assert_abs_diff_eq!(-10000000000000001.0f64, -10000000000000000.0f64);
assert_abs_diff_ne!(-1000000000000000.0f64, -1000000000000001.0f64);
assert_abs_diff_ne!(-1000000000000001.0f64, -1000000000000000.0f64);
}
#[test]
fn test_mid() {
assert_abs_diff_eq!(1.0000000000000001f64, 1.0000000000000002f64);
assert_abs_diff_eq!(1.0000000000000002f64, 1.0000000000000001f64);
assert_abs_diff_ne!(1.000000000000001f64, 1.000000000000002f64);
assert_abs_diff_ne!(1.000000000000002f64, 1.000000000000001f64);
}
#[test]
fn test_mid_neg() {
assert_abs_diff_eq!(-1.0000000000000001f64, -1.0000000000000002f64);
assert_abs_diff_eq!(-1.0000000000000002f64, -1.0000000000000001f64);
assert_abs_diff_ne!(-1.000000000000001f64, -1.000000000000002f64);
assert_abs_diff_ne!(-1.000000000000002f64, -1.000000000000001f64);
}
#[test]
fn test_small() {
assert_abs_diff_eq!(0.0000000100000001f64, 0.0000000100000002f64);
assert_abs_diff_eq!(0.0000000100000002f64, 0.0000000100000001f64);
assert_abs_diff_ne!(0.0000000100000001f64, 0.0000000010000002f64);
assert_abs_diff_ne!(0.0000000100000002f64, 0.0000000010000001f64);
}
#[test]
fn test_small_neg() {
assert_abs_diff_eq!(-0.0000000100000001f64, -0.0000000100000002f64);
assert_abs_diff_eq!(-0.0000000100000002f64, -0.0000000100000001f64);
assert_abs_diff_ne!(-0.0000000100000001f64, -0.0000000010000002f64);
assert_abs_diff_ne!(-0.0000000100000002f64, -0.0000000010000001f64);
}
#[test]
fn test_zero() {
assert_abs_diff_eq!(0.0f64, 0.0f64);
assert_abs_diff_eq!(0.0f64, -0.0f64);
assert_abs_diff_eq!(-0.0f64, -0.0f64);
assert_abs_diff_ne!(0.000000000000001f64, 0.0f64);
assert_abs_diff_ne!(0.0f64, 0.000000000000001f64);
assert_abs_diff_ne!(-0.000000000000001f64, 0.0f64);
assert_abs_diff_ne!(0.0f64, -0.000000000000001f64);
}
#[test]
fn test_epsilon() {
assert_abs_diff_eq!(0.0f64, 1e-40f64, epsilon = 1e-40f64);
assert_abs_diff_eq!(1e-40f64, 0.0f64, epsilon = 1e-40f64);
assert_abs_diff_eq!(0.0f64, -1e-40f64, epsilon = 1e-40f64);
assert_abs_diff_eq!(-1e-40f64, 0.0f64, epsilon = 1e-40f64);
assert_abs_diff_ne!(1e-40f64, 0.0f64, epsilon = 1e-41f64);
assert_abs_diff_ne!(0.0f64, 1e-40f64, epsilon = 1e-41f64);
assert_abs_diff_ne!(-1e-40f64, 0.0f64, epsilon = 1e-41f64);
assert_abs_diff_ne!(0.0f64, -1e-40f64, epsilon = 1e-41f64);
}
#[test]
fn test_max() {
assert_abs_diff_eq!(f64::MAX, f64::MAX);
assert_abs_diff_ne!(f64::MAX, -f64::MAX);
assert_abs_diff_ne!(-f64::MAX, f64::MAX);
assert_abs_diff_ne!(f64::MAX, f64::MAX / 2.0);
assert_abs_diff_ne!(f64::MAX, -f64::MAX / 2.0);
assert_abs_diff_ne!(-f64::MAX, f64::MAX / 2.0);
}
// NOTE: abs_diff_eq fails as numbers begin to get very large
// #[test]
// fn test_infinity() {
// assert_abs_diff_eq!(f64::INFINITY, f64::INFINITY);
// assert_abs_diff_eq!(f64::NEG_INFINITY, f64::NEG_INFINITY);
// assert_abs_diff_ne!(f64::NEG_INFINITY, f64::INFINITY);
// assert_abs_diff_eq!(f64::INFINITY, f64::MAX);
// assert_abs_diff_eq!(f64::NEG_INFINITY, -f64::MAX);
// }
#[test]
fn test_nan() {
assert_abs_diff_ne!(f64::NAN, f64::NAN);
assert_abs_diff_ne!(f64::NAN, 0.0);
assert_abs_diff_ne!(-0.0, f64::NAN);
assert_abs_diff_ne!(f64::NAN, -0.0);
assert_abs_diff_ne!(0.0, f64::NAN);
assert_abs_diff_ne!(f64::NAN, f64::INFINITY);
assert_abs_diff_ne!(f64::INFINITY, f64::NAN);
assert_abs_diff_ne!(f64::NAN, f64::NEG_INFINITY);
assert_abs_diff_ne!(f64::NEG_INFINITY, f64::NAN);
assert_abs_diff_ne!(f64::NAN, f64::MAX);
assert_abs_diff_ne!(f64::MAX, f64::NAN);
assert_abs_diff_ne!(f64::NAN, -f64::MAX);
assert_abs_diff_ne!(-f64::MAX, f64::NAN);
assert_abs_diff_ne!(f64::NAN, f64::MIN_POSITIVE);
assert_abs_diff_ne!(f64::MIN_POSITIVE, f64::NAN);
assert_abs_diff_ne!(f64::NAN, -f64::MIN_POSITIVE);
assert_abs_diff_ne!(-f64::MIN_POSITIVE, f64::NAN);
}
#[test]
fn test_opposite_signs() {
assert_abs_diff_ne!(1.000000001f64, -1.0f64);
assert_abs_diff_ne!(-1.0f64, 1.000000001f64);
assert_abs_diff_ne!(-1.000000001f64, 1.0f64);
assert_abs_diff_ne!(1.0f64, -1.000000001f64);
assert_abs_diff_eq!(10.0 * f64::MIN_POSITIVE, 10.0 * -f64::MIN_POSITIVE);
}
#[test]
fn test_close_to_zero() {
assert_abs_diff_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE);
assert_abs_diff_eq!(f64::MIN_POSITIVE, -f64::MIN_POSITIVE);
assert_abs_diff_eq!(-f64::MIN_POSITIVE, f64::MIN_POSITIVE);
assert_abs_diff_eq!(f64::MIN_POSITIVE, 0.0f64);
assert_abs_diff_eq!(0.0f64, f64::MIN_POSITIVE);
assert_abs_diff_eq!(-f64::MIN_POSITIVE, 0.0f64);
assert_abs_diff_eq!(0.0f64, -f64::MIN_POSITIVE);
assert_abs_diff_ne!(0.000000000000001f64, -f64::MIN_POSITIVE);
assert_abs_diff_ne!(0.000000000000001f64, f64::MIN_POSITIVE);
assert_abs_diff_ne!(f64::MIN_POSITIVE, 0.000000000000001f64);
assert_abs_diff_ne!(-f64::MIN_POSITIVE, 0.000000000000001f64);
}
}
mod test_ref {
mod test_f32 {
#[test]
fn test_basic() {
assert_abs_diff_eq!(&1.0f32, &1.0f32);
assert_abs_diff_ne!(&1.0f32, &2.0f32);
}
}
mod test_f64 {
#[test]
fn test_basic() {
assert_abs_diff_eq!(&1.0f64, &1.0f64);
assert_abs_diff_ne!(&1.0f64, &2.0f64);
}
}
}
mod test_slice {
mod test_f32 {
#[test]
fn test_basic() {
assert_abs_diff_eq!([1.0f32, 2.0f32][..], [1.0f32, 2.0f32][..]);
assert_abs_diff_ne!([1.0f32, 2.0f32][..], [2.0f32, 1.0f32][..]);
}
}
mod test_f64 {
#[test]
fn test_basic() {
assert_abs_diff_eq!([1.0f64, 2.0f64][..], [1.0f64, 2.0f64][..]);
assert_abs_diff_ne!([1.0f64, 2.0f64][..], [2.0f64, 1.0f64][..]);
}
}
}
#[cfg(feature = "num-complex")]
mod test_complex {
extern crate num_complex;
pub use self::num_complex::Complex;
mod test_f32 {
use super::Complex;
#[test]
fn test_basic() {
assert_abs_diff_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
assert_abs_diff_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_abs_diff_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_abs_diff_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
}
}
mod test_f64 {
use super::Complex;
#[test]
fn test_basic() {
assert_abs_diff_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
assert_abs_diff_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_abs_diff_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_abs_diff_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
}
}
}

View File

@ -0,0 +1,18 @@
extern crate approx;
mod test_macro_import {
use approx::{
assert_abs_diff_eq, assert_abs_diff_ne, assert_relative_eq, assert_relative_ne,
assert_ulps_eq, assert_ulps_ne,
};
#[test]
fn test() {
assert_abs_diff_eq!(1.0f32, 1.0f32);
assert_abs_diff_ne!(1.0f32, 2.0f32);
assert_relative_eq!(1.0f32, 1.0f32);
assert_relative_ne!(1.0f32, 2.0f32);
assert_ulps_eq!(1.0f32, 1.0f32);
assert_ulps_ne!(1.0f32, 2.0f32);
}
}

View File

@ -0,0 +1,98 @@
// Copyright 2015 Brendan Zabarauskas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Macro instantiation tests
#[macro_use]
extern crate approx;
#[test]
fn test_abs_diff_eq() {
let _: bool = abs_diff_eq!(1.0, 1.0);
let _: bool = abs_diff_eq!(1.0, 1.0, epsilon = 1.0);
}
#[test]
fn test_abs_diff_eq_trailing_commas() {
let _: bool = abs_diff_eq!(1.0, 1.0,);
let _: bool = abs_diff_eq!(1.0, 1.0, epsilon = 1.0,);
}
#[test]
fn test_abs_diff_ne() {
let _: bool = abs_diff_ne!(1.0, 1.0);
let _: bool = abs_diff_ne!(1.0, 1.0, epsilon = 1.0);
}
#[test]
fn test_abs_diff_ne_trailing_commas() {
let _: bool = abs_diff_ne!(1.0, 1.0,);
let _: bool = abs_diff_ne!(1.0, 1.0, epsilon = 1.0,);
}
#[test]
fn test_relative_eq() {
let _: bool = relative_eq!(1.0, 1.0);
let _: bool = relative_eq!(1.0, 1.0, epsilon = 1.0);
let _: bool = relative_eq!(1.0, 1.0, max_relative = 1.0);
let _: bool = relative_eq!(1.0, 1.0, epsilon = 1.0, max_relative = 1.0);
}
#[test]
fn test_relative_eq_trailing_commas() {
let _: bool = relative_eq!(1.0, 1.0,);
let _: bool = relative_eq!(1.0, 1.0, epsilon = 1.0, max_relative = 1.0,);
}
#[test]
fn test_relative_ne() {
let _: bool = relative_ne!(1.0, 1.0);
let _: bool = relative_ne!(1.0, 1.0, epsilon = 1.0);
let _: bool = relative_ne!(1.0, 1.0, max_relative = 1.0);
let _: bool = relative_ne!(1.0, 1.0, epsilon = 1.0, max_relative = 1.0);
}
#[test]
fn test_relative_ne_trailing_commas() {
let _: bool = relative_ne!(1.0, 1.0,);
let _: bool = relative_ne!(1.0, 1.0, epsilon = 1.0, max_relative = 1.0,);
}
#[test]
fn test_ulps_eq() {
let _: bool = ulps_eq!(1.0, 1.0);
let _: bool = ulps_eq!(1.0, 1.0, epsilon = 1.0);
let _: bool = ulps_eq!(1.0, 1.0, max_ulps = 1);
let _: bool = ulps_eq!(1.0, 1.0, epsilon = 1.0, max_ulps = 1);
}
#[test]
fn test_ulps_eq_trailing_commas() {
let _: bool = ulps_eq!(1.0, 1.0,);
let _: bool = ulps_eq!(1.0, 1.0, epsilon = 1.0, max_ulps = 1,);
}
#[test]
fn test_ulps_ne() {
let _: bool = ulps_ne!(1.0, 1.0);
let _: bool = ulps_ne!(1.0, 1.0, epsilon = 1.0);
let _: bool = ulps_ne!(1.0, 1.0, max_ulps = 1);
let _: bool = ulps_ne!(1.0, 1.0, epsilon = 1.0, max_ulps = 1);
}
#[test]
fn test_ulps_ne_trailing_commas() {
let _: bool = ulps_ne!(1.0, 1.0,);
let _: bool = ulps_ne!(1.0, 1.0, epsilon = 1.0, max_ulps = 1,);
}

View File

@ -0,0 +1,440 @@
// Copyright 2015 Brendan Zabarauskas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Test cases derived from:
// https://github.com/Pybonacci/puntoflotante.org/blob/master/content/errors/NearlyEqualsTest.java
#[macro_use]
extern crate approx;
mod test_f32 {
use std::f32;
#[test]
fn test_basic() {
assert_relative_eq!(1.0f32, 1.0f32);
assert_relative_ne!(1.0f32, 2.0f32);
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_relative_eq!(1.0f32, 2.0f32);
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_relative_ne!(1.0f32, 1.0f32);
}
#[test]
fn test_big() {
assert_relative_eq!(100000000.0f32, 100000001.0f32);
assert_relative_eq!(100000001.0f32, 100000000.0f32);
assert_relative_ne!(10000.0f32, 10001.0f32);
assert_relative_ne!(10001.0f32, 10000.0f32);
}
#[test]
fn test_big_neg() {
assert_relative_eq!(-100000000.0f32, -100000001.0f32);
assert_relative_eq!(-100000001.0f32, -100000000.0f32);
assert_relative_ne!(-10000.0f32, -10001.0f32);
assert_relative_ne!(-10001.0f32, -10000.0f32);
}
#[test]
fn test_mid() {
assert_relative_eq!(1.0000001f32, 1.0000002f32);
assert_relative_eq!(1.0000002f32, 1.0000001f32);
assert_relative_ne!(1.000001f32, 1.000002f32);
assert_relative_ne!(1.000002f32, 1.000001f32);
}
#[test]
fn test_mid_neg() {
assert_relative_eq!(-1.0000001f32, -1.0000002f32);
assert_relative_eq!(-1.0000002f32, -1.0000001f32);
assert_relative_ne!(-1.000001f32, -1.000002f32);
assert_relative_ne!(-1.000002f32, -1.000001f32);
}
#[test]
fn test_small() {
assert_relative_eq!(0.000010001f32, 0.000010002f32);
assert_relative_eq!(0.000010002f32, 0.000010001f32);
assert_relative_ne!(0.000001002f32, 0.0000001001f32);
assert_relative_ne!(0.000001001f32, 0.0000001002f32);
}
#[test]
fn test_small_neg() {
assert_relative_eq!(-0.000010001f32, -0.000010002f32);
assert_relative_eq!(-0.000010002f32, -0.000010001f32);
assert_relative_ne!(-0.000001002f32, -0.0000001001f32);
assert_relative_ne!(-0.000001001f32, -0.0000001002f32);
}
#[test]
fn test_zero() {
assert_relative_eq!(0.0f32, 0.0f32);
assert_relative_eq!(0.0f32, -0.0f32);
assert_relative_eq!(-0.0f32, -0.0f32);
assert_relative_ne!(0.000001f32, 0.0f32);
assert_relative_ne!(0.0f32, 0.000001f32);
assert_relative_ne!(-0.000001f32, 0.0f32);
assert_relative_ne!(0.0f32, -0.000001f32);
}
#[test]
fn test_epsilon() {
assert_relative_eq!(0.0f32, 1e-40f32, epsilon = 1e-40f32);
assert_relative_eq!(1e-40f32, 0.0f32, epsilon = 1e-40f32);
assert_relative_eq!(0.0f32, -1e-40f32, epsilon = 1e-40f32);
assert_relative_eq!(-1e-40f32, 0.0f32, epsilon = 1e-40f32);
assert_relative_ne!(1e-40f32, 0.0f32, epsilon = 1e-41f32);
assert_relative_ne!(0.0f32, 1e-40f32, epsilon = 1e-41f32);
assert_relative_ne!(-1e-40f32, 0.0f32, epsilon = 1e-41f32);
assert_relative_ne!(0.0f32, -1e-40f32, epsilon = 1e-41f32);
}
#[test]
fn test_max() {
assert_relative_eq!(f32::MAX, f32::MAX);
assert_relative_ne!(f32::MAX, -f32::MAX);
assert_relative_ne!(-f32::MAX, f32::MAX);
assert_relative_ne!(f32::MAX, f32::MAX / 2.0);
assert_relative_ne!(f32::MAX, -f32::MAX / 2.0);
assert_relative_ne!(-f32::MAX, f32::MAX / 2.0);
}
#[test]
fn test_infinity() {
assert_relative_eq!(f32::INFINITY, f32::INFINITY);
assert_relative_eq!(f32::NEG_INFINITY, f32::NEG_INFINITY);
assert_relative_ne!(f32::NEG_INFINITY, f32::INFINITY);
}
#[test]
fn test_zero_infinity() {
assert_relative_ne!(0f32, f32::INFINITY);
assert_relative_ne!(0f32, f32::NEG_INFINITY);
}
#[test]
fn test_nan() {
assert_relative_ne!(f32::NAN, f32::NAN);
assert_relative_ne!(f32::NAN, 0.0);
assert_relative_ne!(-0.0, f32::NAN);
assert_relative_ne!(f32::NAN, -0.0);
assert_relative_ne!(0.0, f32::NAN);
assert_relative_ne!(f32::NAN, f32::INFINITY);
assert_relative_ne!(f32::INFINITY, f32::NAN);
assert_relative_ne!(f32::NAN, f32::NEG_INFINITY);
assert_relative_ne!(f32::NEG_INFINITY, f32::NAN);
assert_relative_ne!(f32::NAN, f32::MAX);
assert_relative_ne!(f32::MAX, f32::NAN);
assert_relative_ne!(f32::NAN, -f32::MAX);
assert_relative_ne!(-f32::MAX, f32::NAN);
assert_relative_ne!(f32::NAN, f32::MIN_POSITIVE);
assert_relative_ne!(f32::MIN_POSITIVE, f32::NAN);
assert_relative_ne!(f32::NAN, -f32::MIN_POSITIVE);
assert_relative_ne!(-f32::MIN_POSITIVE, f32::NAN);
}
#[test]
fn test_opposite_signs() {
assert_relative_ne!(1.000000001f32, -1.0f32);
assert_relative_ne!(-1.0f32, 1.000000001f32);
assert_relative_ne!(-1.000000001f32, 1.0f32);
assert_relative_ne!(1.0f32, -1.000000001f32);
assert_relative_eq!(10.0 * f32::MIN_POSITIVE, 10.0 * -f32::MIN_POSITIVE);
}
#[test]
fn test_close_to_zero() {
assert_relative_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE);
assert_relative_eq!(f32::MIN_POSITIVE, -f32::MIN_POSITIVE);
assert_relative_eq!(-f32::MIN_POSITIVE, f32::MIN_POSITIVE);
assert_relative_eq!(f32::MIN_POSITIVE, 0.0f32);
assert_relative_eq!(0.0f32, f32::MIN_POSITIVE);
assert_relative_eq!(-f32::MIN_POSITIVE, 0.0f32);
assert_relative_eq!(0.0f32, -f32::MIN_POSITIVE);
assert_relative_ne!(0.000001f32, -f32::MIN_POSITIVE);
assert_relative_ne!(0.000001f32, f32::MIN_POSITIVE);
assert_relative_ne!(f32::MIN_POSITIVE, 0.000001f32);
assert_relative_ne!(-f32::MIN_POSITIVE, 0.000001f32);
}
}
#[cfg(test)]
mod test_f64 {
use std::f64;
#[test]
fn test_basic() {
assert_relative_eq!(1.0f64, 1.0f64);
assert_relative_ne!(1.0f64, 2.0f64);
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_relative_eq!(1.0f64, 2.0f64);
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_relative_ne!(1.0f64, 1.0f64);
}
#[test]
fn test_big() {
assert_relative_eq!(10000000000000000.0f64, 10000000000000001.0f64);
assert_relative_eq!(10000000000000001.0f64, 10000000000000000.0f64);
assert_relative_ne!(1000000000000000.0f64, 1000000000000001.0f64);
assert_relative_ne!(1000000000000001.0f64, 1000000000000000.0f64);
}
#[test]
fn test_big_neg() {
assert_relative_eq!(-10000000000000000.0f64, -10000000000000001.0f64);
assert_relative_eq!(-10000000000000001.0f64, -10000000000000000.0f64);
assert_relative_ne!(-1000000000000000.0f64, -1000000000000001.0f64);
assert_relative_ne!(-1000000000000001.0f64, -1000000000000000.0f64);
}
#[test]
fn test_mid() {
assert_relative_eq!(1.0000000000000001f64, 1.0000000000000002f64);
assert_relative_eq!(1.0000000000000002f64, 1.0000000000000001f64);
assert_relative_ne!(1.000000000000001f64, 1.000000000000002f64);
assert_relative_ne!(1.000000000000002f64, 1.000000000000001f64);
}
#[test]
fn test_mid_neg() {
assert_relative_eq!(-1.0000000000000001f64, -1.0000000000000002f64);
assert_relative_eq!(-1.0000000000000002f64, -1.0000000000000001f64);
assert_relative_ne!(-1.000000000000001f64, -1.000000000000002f64);
assert_relative_ne!(-1.000000000000002f64, -1.000000000000001f64);
}
#[test]
fn test_small() {
assert_relative_eq!(0.0000000100000001f64, 0.0000000100000002f64);
assert_relative_eq!(0.0000000100000002f64, 0.0000000100000001f64);
assert_relative_ne!(0.0000000100000001f64, 0.0000000010000002f64);
assert_relative_ne!(0.0000000100000002f64, 0.0000000010000001f64);
}
#[test]
fn test_small_neg() {
assert_relative_eq!(-0.0000000100000001f64, -0.0000000100000002f64);
assert_relative_eq!(-0.0000000100000002f64, -0.0000000100000001f64);
assert_relative_ne!(-0.0000000100000001f64, -0.0000000010000002f64);
assert_relative_ne!(-0.0000000100000002f64, -0.0000000010000001f64);
}
#[test]
fn test_zero() {
assert_relative_eq!(0.0f64, 0.0f64);
assert_relative_eq!(0.0f64, -0.0f64);
assert_relative_eq!(-0.0f64, -0.0f64);
assert_relative_ne!(0.000000000000001f64, 0.0f64);
assert_relative_ne!(0.0f64, 0.000000000000001f64);
assert_relative_ne!(-0.000000000000001f64, 0.0f64);
assert_relative_ne!(0.0f64, -0.000000000000001f64);
}
#[test]
fn test_epsilon() {
assert_relative_eq!(0.0f64, 1e-40f64, epsilon = 1e-40f64);
assert_relative_eq!(1e-40f64, 0.0f64, epsilon = 1e-40f64);
assert_relative_eq!(0.0f64, -1e-40f64, epsilon = 1e-40f64);
assert_relative_eq!(-1e-40f64, 0.0f64, epsilon = 1e-40f64);
assert_relative_ne!(1e-40f64, 0.0f64, epsilon = 1e-41f64);
assert_relative_ne!(0.0f64, 1e-40f64, epsilon = 1e-41f64);
assert_relative_ne!(-1e-40f64, 0.0f64, epsilon = 1e-41f64);
assert_relative_ne!(0.0f64, -1e-40f64, epsilon = 1e-41f64);
}
#[test]
fn test_max() {
assert_relative_eq!(f64::MAX, f64::MAX);
assert_relative_ne!(f64::MAX, -f64::MAX);
assert_relative_ne!(-f64::MAX, f64::MAX);
assert_relative_ne!(f64::MAX, f64::MAX / 2.0);
assert_relative_ne!(f64::MAX, -f64::MAX / 2.0);
assert_relative_ne!(-f64::MAX, f64::MAX / 2.0);
}
#[test]
fn test_infinity() {
assert_relative_eq!(f64::INFINITY, f64::INFINITY);
assert_relative_eq!(f64::NEG_INFINITY, f64::NEG_INFINITY);
assert_relative_ne!(f64::NEG_INFINITY, f64::INFINITY);
}
#[test]
fn test_nan() {
assert_relative_ne!(f64::NAN, f64::NAN);
assert_relative_ne!(f64::NAN, 0.0);
assert_relative_ne!(-0.0, f64::NAN);
assert_relative_ne!(f64::NAN, -0.0);
assert_relative_ne!(0.0, f64::NAN);
assert_relative_ne!(f64::NAN, f64::INFINITY);
assert_relative_ne!(f64::INFINITY, f64::NAN);
assert_relative_ne!(f64::NAN, f64::NEG_INFINITY);
assert_relative_ne!(f64::NEG_INFINITY, f64::NAN);
assert_relative_ne!(f64::NAN, f64::MAX);
assert_relative_ne!(f64::MAX, f64::NAN);
assert_relative_ne!(f64::NAN, -f64::MAX);
assert_relative_ne!(-f64::MAX, f64::NAN);
assert_relative_ne!(f64::NAN, f64::MIN_POSITIVE);
assert_relative_ne!(f64::MIN_POSITIVE, f64::NAN);
assert_relative_ne!(f64::NAN, -f64::MIN_POSITIVE);
assert_relative_ne!(-f64::MIN_POSITIVE, f64::NAN);
}
#[test]
fn test_opposite_signs() {
assert_relative_ne!(1.000000001f64, -1.0f64);
assert_relative_ne!(-1.0f64, 1.000000001f64);
assert_relative_ne!(-1.000000001f64, 1.0f64);
assert_relative_ne!(1.0f64, -1.000000001f64);
assert_relative_eq!(10.0 * f64::MIN_POSITIVE, 10.0 * -f64::MIN_POSITIVE);
}
#[test]
fn test_close_to_zero() {
assert_relative_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE);
assert_relative_eq!(f64::MIN_POSITIVE, -f64::MIN_POSITIVE);
assert_relative_eq!(-f64::MIN_POSITIVE, f64::MIN_POSITIVE);
assert_relative_eq!(f64::MIN_POSITIVE, 0.0f64);
assert_relative_eq!(0.0f64, f64::MIN_POSITIVE);
assert_relative_eq!(-f64::MIN_POSITIVE, 0.0f64);
assert_relative_eq!(0.0f64, -f64::MIN_POSITIVE);
assert_relative_ne!(0.000000000000001f64, -f64::MIN_POSITIVE);
assert_relative_ne!(0.000000000000001f64, f64::MIN_POSITIVE);
assert_relative_ne!(f64::MIN_POSITIVE, 0.000000000000001f64);
assert_relative_ne!(-f64::MIN_POSITIVE, 0.000000000000001f64);
}
}
mod test_ref {
mod test_f32 {
#[test]
fn test_basic() {
assert_relative_eq!(&1.0f32, &1.0f32);
assert_relative_ne!(&1.0f32, &2.0f32);
}
}
mod test_f64 {
#[test]
fn test_basic() {
assert_relative_eq!(&1.0f64, &1.0f64);
assert_relative_ne!(&1.0f64, &2.0f64);
}
}
}
mod test_slice {
mod test_f32 {
#[test]
fn test_basic() {
assert_relative_eq!([1.0f32, 2.0f32][..], [1.0f32, 2.0f32][..]);
assert_relative_ne!([1.0f32, 2.0f32][..], [2.0f32, 1.0f32][..]);
}
}
mod test_f64 {
#[test]
fn test_basic() {
assert_relative_eq!([1.0f64, 2.0f64][..], [1.0f64, 2.0f64][..]);
assert_relative_ne!([1.0f64, 2.0f64][..], [2.0f64, 1.0f64][..]);
}
}
}
#[cfg(feature = "num-complex")]
mod test_complex {
extern crate num_complex;
pub use self::num_complex::Complex;
mod test_f32 {
use super::Complex;
#[test]
fn test_basic() {
assert_relative_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
assert_relative_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_relative_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_relative_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
}
}
mod test_f64 {
use super::Complex;
#[test]
fn test_basic() {
assert_relative_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
assert_relative_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_relative_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_relative_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
}
}
}

View File

@ -0,0 +1,438 @@
// Copyright 2015 Brendan Zabarauskas
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Test cases derived from:
// https://github.com/Pybonacci/puntoflotante.org/blob/master/content/errors/NearlyEqualsTest.java
#[macro_use]
extern crate approx;
mod test_f32 {
use std::f32;
#[test]
fn test_basic() {
assert_ulps_eq!(1.0f32, 1.0f32);
assert_ulps_ne!(1.0f32, 2.0f32);
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_ulps_eq!(1.0f32, 2.0f32);
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_ulps_ne!(1.0f32, 1.0f32);
}
#[test]
fn test_big() {
assert_ulps_eq!(100000000.0f32, 100000001.0f32);
assert_ulps_eq!(100000001.0f32, 100000000.0f32);
assert_ulps_ne!(10000.0f32, 10001.0f32);
assert_ulps_ne!(10001.0f32, 10000.0f32);
}
#[test]
fn test_big_neg() {
assert_ulps_eq!(-100000000.0f32, -100000001.0f32);
assert_ulps_eq!(-100000001.0f32, -100000000.0f32);
assert_ulps_ne!(-10000.0f32, -10001.0f32);
assert_ulps_ne!(-10001.0f32, -10000.0f32);
}
#[test]
fn test_mid() {
assert_ulps_eq!(1.0000001f32, 1.0000002f32);
assert_ulps_eq!(1.0000002f32, 1.0000001f32);
assert_ulps_ne!(1.000001f32, 1.000002f32);
assert_ulps_ne!(1.000002f32, 1.000001f32);
}
#[test]
fn test_mid_neg() {
assert_ulps_eq!(-1.0000001f32, -1.0000002f32);
assert_ulps_eq!(-1.0000002f32, -1.0000001f32);
assert_ulps_ne!(-1.000001f32, -1.000002f32);
assert_ulps_ne!(-1.000002f32, -1.000001f32);
}
#[test]
fn test_small() {
assert_ulps_eq!(0.000010001f32, 0.000010002f32);
assert_ulps_eq!(0.000010002f32, 0.000010001f32);
assert_ulps_ne!(0.000001002f32, 0.0000001001f32);
assert_ulps_ne!(0.000001001f32, 0.0000001002f32);
}
#[test]
fn test_small_neg() {
assert_ulps_eq!(-0.000010001f32, -0.000010002f32);
assert_ulps_eq!(-0.000010002f32, -0.000010001f32);
assert_ulps_ne!(-0.000001002f32, -0.0000001001f32);
assert_ulps_ne!(-0.000001001f32, -0.0000001002f32);
}
#[test]
fn test_zero() {
assert_ulps_eq!(0.0f32, 0.0f32);
assert_ulps_eq!(0.0f32, -0.0f32);
assert_ulps_eq!(-0.0f32, -0.0f32);
assert_ulps_ne!(0.000001f32, 0.0f32);
assert_ulps_ne!(0.0f32, 0.000001f32);
assert_ulps_ne!(-0.000001f32, 0.0f32);
assert_ulps_ne!(0.0f32, -0.000001f32);
}
#[test]
fn test_epsilon() {
assert_ulps_eq!(0.0f32, 1e-40f32, epsilon = 1e-40f32);
assert_ulps_eq!(1e-40f32, 0.0f32, epsilon = 1e-40f32);
assert_ulps_eq!(0.0f32, -1e-40f32, epsilon = 1e-40f32);
assert_ulps_eq!(-1e-40f32, 0.0f32, epsilon = 1e-40f32);
assert_ulps_ne!(1e-40f32, 0.0f32, epsilon = 1e-41f32);
assert_ulps_ne!(0.0f32, 1e-40f32, epsilon = 1e-41f32);
assert_ulps_ne!(-1e-40f32, 0.0f32, epsilon = 1e-41f32);
assert_ulps_ne!(0.0f32, -1e-40f32, epsilon = 1e-41f32);
}
#[test]
fn test_max() {
assert_ulps_eq!(f32::MAX, f32::MAX);
assert_ulps_ne!(f32::MAX, -f32::MAX);
assert_ulps_ne!(-f32::MAX, f32::MAX);
assert_ulps_ne!(f32::MAX, f32::MAX / 2.0);
assert_ulps_ne!(f32::MAX, -f32::MAX / 2.0);
assert_ulps_ne!(-f32::MAX, f32::MAX / 2.0);
}
#[test]
fn test_infinity() {
assert_ulps_eq!(f32::INFINITY, f32::INFINITY);
assert_ulps_eq!(f32::NEG_INFINITY, f32::NEG_INFINITY);
assert_ulps_ne!(f32::NEG_INFINITY, f32::INFINITY);
assert_ulps_eq!(f32::INFINITY, f32::MAX);
assert_ulps_eq!(f32::NEG_INFINITY, -f32::MAX);
}
#[test]
fn test_nan() {
assert_ulps_ne!(f32::NAN, f32::NAN);
assert_ulps_ne!(f32::NAN, 0.0);
assert_ulps_ne!(-0.0, f32::NAN);
assert_ulps_ne!(f32::NAN, -0.0);
assert_ulps_ne!(0.0, f32::NAN);
assert_ulps_ne!(f32::NAN, f32::INFINITY);
assert_ulps_ne!(f32::INFINITY, f32::NAN);
assert_ulps_ne!(f32::NAN, f32::NEG_INFINITY);
assert_ulps_ne!(f32::NEG_INFINITY, f32::NAN);
assert_ulps_ne!(f32::NAN, f32::MAX);
assert_ulps_ne!(f32::MAX, f32::NAN);
assert_ulps_ne!(f32::NAN, -f32::MAX);
assert_ulps_ne!(-f32::MAX, f32::NAN);
assert_ulps_ne!(f32::NAN, f32::MIN_POSITIVE);
assert_ulps_ne!(f32::MIN_POSITIVE, f32::NAN);
assert_ulps_ne!(f32::NAN, -f32::MIN_POSITIVE);
assert_ulps_ne!(-f32::MIN_POSITIVE, f32::NAN);
}
#[test]
fn test_opposite_signs() {
assert_ulps_ne!(1.000000001f32, -1.0f32);
assert_ulps_ne!(-1.0f32, 1.000000001f32);
assert_ulps_ne!(-1.000000001f32, 1.0f32);
assert_ulps_ne!(1.0f32, -1.000000001f32);
assert_ulps_eq!(10.0 * f32::MIN_POSITIVE, 10.0 * -f32::MIN_POSITIVE);
}
#[test]
fn test_close_to_zero() {
assert_ulps_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE);
assert_ulps_eq!(f32::MIN_POSITIVE, -f32::MIN_POSITIVE);
assert_ulps_eq!(-f32::MIN_POSITIVE, f32::MIN_POSITIVE);
assert_ulps_eq!(f32::MIN_POSITIVE, 0.0f32);
assert_ulps_eq!(0.0f32, f32::MIN_POSITIVE);
assert_ulps_eq!(-f32::MIN_POSITIVE, 0.0f32);
assert_ulps_eq!(0.0f32, -f32::MIN_POSITIVE);
assert_ulps_ne!(0.000001f32, -f32::MIN_POSITIVE);
assert_ulps_ne!(0.000001f32, f32::MIN_POSITIVE);
assert_ulps_ne!(f32::MIN_POSITIVE, 0.000001f32);
assert_ulps_ne!(-f32::MIN_POSITIVE, 0.000001f32);
}
}
#[cfg(test)]
mod test_f64 {
use std::f64;
#[test]
fn test_basic() {
assert_ulps_eq!(1.0f64, 1.0f64);
assert_ulps_ne!(1.0f64, 2.0f64);
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_ulps_eq!(1.0f64, 2.0f64);
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_ulps_ne!(1.0f64, 1.0f64);
}
#[test]
fn test_big() {
assert_ulps_eq!(10000000000000000.0f64, 10000000000000001.0f64);
assert_ulps_eq!(10000000000000001.0f64, 10000000000000000.0f64);
assert_ulps_ne!(1000000000000000.0f64, 1000000000000001.0f64);
assert_ulps_ne!(1000000000000001.0f64, 1000000000000000.0f64);
}
#[test]
fn test_big_neg() {
assert_ulps_eq!(-10000000000000000.0f64, -10000000000000001.0f64);
assert_ulps_eq!(-10000000000000001.0f64, -10000000000000000.0f64);
assert_ulps_ne!(-1000000000000000.0f64, -1000000000000001.0f64);
assert_ulps_ne!(-1000000000000001.0f64, -1000000000000000.0f64);
}
#[test]
fn test_mid() {
assert_ulps_eq!(1.0000000000000001f64, 1.0000000000000002f64);
assert_ulps_eq!(1.0000000000000002f64, 1.0000000000000001f64);
assert_ulps_ne!(1.000000000000001f64, 1.0000000000000022f64);
assert_ulps_ne!(1.0000000000000022f64, 1.000000000000001f64);
}
#[test]
fn test_mid_neg() {
assert_ulps_eq!(-1.0000000000000001f64, -1.0000000000000002f64);
assert_ulps_eq!(-1.0000000000000002f64, -1.0000000000000001f64);
assert_ulps_ne!(-1.000000000000001f64, -1.0000000000000022f64);
assert_ulps_ne!(-1.0000000000000022f64, -1.000000000000001f64);
}
#[test]
fn test_small() {
assert_ulps_eq!(0.0000000100000001f64, 0.0000000100000002f64);
assert_ulps_eq!(0.0000000100000002f64, 0.0000000100000001f64);
assert_ulps_ne!(0.0000000100000001f64, 0.0000000010000002f64);
assert_ulps_ne!(0.0000000100000002f64, 0.0000000010000001f64);
}
#[test]
fn test_small_neg() {
assert_ulps_eq!(-0.0000000100000001f64, -0.0000000100000002f64);
assert_ulps_eq!(-0.0000000100000002f64, -0.0000000100000001f64);
assert_ulps_ne!(-0.0000000100000001f64, -0.0000000010000002f64);
assert_ulps_ne!(-0.0000000100000002f64, -0.0000000010000001f64);
}
#[test]
fn test_zero() {
assert_ulps_eq!(0.0f64, 0.0f64);
assert_ulps_eq!(0.0f64, -0.0f64);
assert_ulps_eq!(-0.0f64, -0.0f64);
assert_ulps_ne!(0.000000000000001f64, 0.0f64);
assert_ulps_ne!(0.0f64, 0.000000000000001f64);
assert_ulps_ne!(-0.000000000000001f64, 0.0f64);
assert_ulps_ne!(0.0f64, -0.000000000000001f64);
}
#[test]
fn test_epsilon() {
assert_ulps_eq!(0.0f64, 1e-40f64, epsilon = 1e-40f64);
assert_ulps_eq!(1e-40f64, 0.0f64, epsilon = 1e-40f64);
assert_ulps_eq!(0.0f64, -1e-40f64, epsilon = 1e-40f64);
assert_ulps_eq!(-1e-40f64, 0.0f64, epsilon = 1e-40f64);
assert_ulps_ne!(1e-40f64, 0.0f64, epsilon = 1e-41f64);
assert_ulps_ne!(0.0f64, 1e-40f64, epsilon = 1e-41f64);
assert_ulps_ne!(-1e-40f64, 0.0f64, epsilon = 1e-41f64);
assert_ulps_ne!(0.0f64, -1e-40f64, epsilon = 1e-41f64);
}
#[test]
fn test_max() {
assert_ulps_eq!(f64::MAX, f64::MAX);
assert_ulps_ne!(f64::MAX, -f64::MAX);
assert_ulps_ne!(-f64::MAX, f64::MAX);
assert_ulps_ne!(f64::MAX, f64::MAX / 2.0);
assert_ulps_ne!(f64::MAX, -f64::MAX / 2.0);
assert_ulps_ne!(-f64::MAX, f64::MAX / 2.0);
}
#[test]
fn test_infinity() {
assert_ulps_eq!(f64::INFINITY, f64::INFINITY);
assert_ulps_eq!(f64::NEG_INFINITY, f64::NEG_INFINITY);
assert_ulps_ne!(f64::NEG_INFINITY, f64::INFINITY);
assert_ulps_eq!(f64::INFINITY, f64::MAX);
assert_ulps_eq!(f64::NEG_INFINITY, -f64::MAX);
}
#[test]
fn test_nan() {
assert_ulps_ne!(f64::NAN, f64::NAN);
assert_ulps_ne!(f64::NAN, 0.0);
assert_ulps_ne!(-0.0, f64::NAN);
assert_ulps_ne!(f64::NAN, -0.0);
assert_ulps_ne!(0.0, f64::NAN);
assert_ulps_ne!(f64::NAN, f64::INFINITY);
assert_ulps_ne!(f64::INFINITY, f64::NAN);
assert_ulps_ne!(f64::NAN, f64::NEG_INFINITY);
assert_ulps_ne!(f64::NEG_INFINITY, f64::NAN);
assert_ulps_ne!(f64::NAN, f64::MAX);
assert_ulps_ne!(f64::MAX, f64::NAN);
assert_ulps_ne!(f64::NAN, -f64::MAX);
assert_ulps_ne!(-f64::MAX, f64::NAN);
assert_ulps_ne!(f64::NAN, f64::MIN_POSITIVE);
assert_ulps_ne!(f64::MIN_POSITIVE, f64::NAN);
assert_ulps_ne!(f64::NAN, -f64::MIN_POSITIVE);
assert_ulps_ne!(-f64::MIN_POSITIVE, f64::NAN);
}
#[test]
fn test_opposite_signs() {
assert_ulps_ne!(1.000000001f64, -1.0f64);
assert_ulps_ne!(-1.0f64, 1.000000001f64);
assert_ulps_ne!(-1.000000001f64, 1.0f64);
assert_ulps_ne!(1.0f64, -1.000000001f64);
assert_ulps_eq!(10.0 * f64::MIN_POSITIVE, 10.0 * -f64::MIN_POSITIVE);
}
#[test]
fn test_close_to_zero() {
assert_ulps_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE);
assert_ulps_eq!(f64::MIN_POSITIVE, -f64::MIN_POSITIVE);
assert_ulps_eq!(-f64::MIN_POSITIVE, f64::MIN_POSITIVE);
assert_ulps_eq!(f64::MIN_POSITIVE, 0.0f64);
assert_ulps_eq!(0.0f64, f64::MIN_POSITIVE);
assert_ulps_eq!(-f64::MIN_POSITIVE, 0.0f64);
assert_ulps_eq!(0.0f64, -f64::MIN_POSITIVE);
assert_ulps_ne!(0.000000000000001f64, -f64::MIN_POSITIVE);
assert_ulps_ne!(0.000000000000001f64, f64::MIN_POSITIVE);
assert_ulps_ne!(f64::MIN_POSITIVE, 0.000000000000001f64);
assert_ulps_ne!(-f64::MIN_POSITIVE, 0.000000000000001f64);
}
}
mod test_ref {
mod test_f32 {
#[test]
fn test_basic() {
assert_ulps_eq!(&1.0f32, &1.0f32);
assert_ulps_ne!(&1.0f32, &2.0f32);
}
}
mod test_f64 {
#[test]
fn test_basic() {
assert_ulps_eq!(&1.0f64, &1.0f64);
assert_ulps_ne!(&1.0f64, &2.0f64);
}
}
}
mod test_slice {
mod test_f32 {
#[test]
fn test_basic() {
assert_ulps_eq!([1.0f32, 2.0f32][..], [1.0f32, 2.0f32][..]);
assert_ulps_ne!([1.0f32, 2.0f32][..], [2.0f32, 1.0f32][..]);
}
}
mod test_f64 {
#[test]
fn test_basic() {
assert_ulps_eq!([1.0f64, 2.0f64][..], [1.0f64, 2.0f64][..]);
assert_ulps_ne!([1.0f64, 2.0f64][..], [2.0f64, 1.0f64][..]);
}
}
}
#[cfg(feature = "num-complex")]
mod test_complex {
extern crate num_complex;
pub use self::num_complex::Complex;
mod test_f32 {
use super::Complex;
#[test]
fn test_basic() {
assert_ulps_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
assert_ulps_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_ulps_eq!(Complex::new(1.0f32, 2.0f32), Complex::new(2.0f32, 1.0f32));
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_ulps_ne!(Complex::new(1.0f32, 2.0f32), Complex::new(1.0f32, 2.0f32));
}
}
mod test_f64 {
use super::Complex;
#[test]
fn test_basic() {
assert_ulps_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
assert_ulps_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
}
#[test]
#[should_panic]
fn test_basic_panic_eq() {
assert_ulps_eq!(Complex::new(1.0f64, 2.0f64), Complex::new(2.0f64, 1.0f64));
}
#[test]
#[should_panic]
fn test_basic_panic_ne() {
assert_ulps_ne!(Complex::new(1.0f64, 2.0f64), Complex::new(1.0f64, 2.0f64));
}
}
}

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"adb58f911fb48a1a3828bda514c2da5ce5650e8e98eb7217f014b1720fe11d38","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0245ee104228a100ce5fceecf43e25faae450494d9173f43fd94c27d69fdac13","README.rst":"a3cff166a7b622233cd9ccfbe472f910267629f4b41e387ad8573fc06eac37d6","benches/arraystring.rs":"f12b890977117ebde4ca42bcd6b91f2a6a087f2b235aaca6d15e30d125ae9f67","benches/extend.rs":"8c8f78df7e90b62c7e160cf5ea6c61b90bc4035a9704b6a179a1e01d8fafe2e9","build.rs":"fc29930f06cb4dde58f43d2f30b28c366ca3bafcd7e44b41a1c250d60fa900fb","custom.css":"e6f2cd299392337b4e2959c52f422e5b7be11920ea98d10db44d10ddef5ed47c","src/array.rs":"67fb063ee515bfd4968ede219dff81091a5935ef93529ebd1bb2a716ea3ed3d3","src/array_string.rs":"8a1a4cfc1699e2373815e57dc676a87a30629f91a9e861c866ccc6cb1381eadf","src/char.rs":"64a08f6a743b67bf2c96483f91c2fdaea79f6e91df5cd752f770b16a6b1d5b1e","src/errors.rs":"dde99bffaddfd45396aab7e07642cc018ef5435fe60c4f26a2c05a36555be18c","src/lib.rs":"34167f35d9a5b887e6fb424500bb64764d68d029d0e374827886b05ad4d26bca","src/maybe_uninit.rs":"7cca39ffe0f122716baaa174b433ff5fe9c93560f8e54fc077a0083500eaa1dd","src/maybe_uninit_nodrop.rs":"7fb2e24bf815dd6e1d104056fa9be4a11de7e0f0e5474742af186c580a6b47cc","src/maybe_uninit_stable.rs":"3f7daba622cf5df86992b451b46636a491c9611292f59969eb6890a10a00476d","src/range.rs":"65744ab7def208a1ab155ea2448fe9ea7fc14f33211361b1041f540125b32efd","tests/serde.rs":"ef3986a82656b09f3fbb14358e767051ffabe09592c61e69ea695cb88760e8ba","tests/tests.rs":"8066a4aca7b40356525ed87f7658773e610ef4fce3522b0cc0f301384d880f00"},"package":"cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"}

View File

@ -0,0 +1,48 @@
"""
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # "MIT,Apache-2.0"
])
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_library",
"rust_binary",
"rust_test",
)
# Unsupported target "arraystring" with type "bench" omitted
rust_library(
name = "arrayvec",
crate_root = "src/lib.rs",
crate_type = "lib",
edition = "2015",
srcs = glob(["**/*.rs"]),
deps = [
"//third_party/cargo/vendor/nodrop-0.1.14:nodrop",
],
rustc_flags = [
"--cap-lints=allow",
],
version = "0.4.12",
crate_features = [
],
)
# Unsupported target "build-script-build" with type "custom-build" omitted
# Unsupported target "extend" with type "bench" omitted
# Unsupported target "serde" with type "test" omitted
# Unsupported target "tests" with type "test" omitted

View File

@ -0,0 +1,62 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "arrayvec"
version = "0.4.12"
authors = ["bluss"]
description = "A vector with fixed capacity, backed by an array (it can be stored on the stack too). Implements fixed capacity ArrayVec and ArrayString."
documentation = "https://docs.rs/arrayvec/"
keywords = ["stack", "vector", "array", "data-structure", "no_std"]
categories = ["data-structures", "no-std"]
license = "MIT/Apache-2.0"
repository = "https://github.com/bluss/arrayvec"
[package.metadata.docs.rs]
features = ["serde-1"]
[package.metadata.release]
no-dev-version = true
tag-name = "{{version}}"
[[bench]]
name = "extend"
harness = false
[[bench]]
name = "arraystring"
harness = false
[dependencies.nodrop]
version = "0.1.12"
default-features = false
[dependencies.serde]
version = "1.0"
optional = true
default-features = false
[dev-dependencies.bencher]
version = "0.1.4"
[dev-dependencies.matches]
version = "0.1"
[dev-dependencies.serde_test]
version = "1.0"
[build-dependencies]
[features]
array-sizes-129-255 = []
array-sizes-33-128 = []
default = ["std"]
serde-1 = ["serde"]
std = []
use_union = []

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,25 @@
Copyright (c) Ulrik Sverdrup "bluss" 2015-2017
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,224 @@
arrayvec
========
A vector with fixed capacity.
Please read the `API documentation here`__
__ https://docs.rs/arrayvec
|build_status|_ |crates|_ |crates2|_
.. |build_status| image:: https://travis-ci.org/bluss/arrayvec.svg
.. _build_status: https://travis-ci.org/bluss/arrayvec
.. |crates| image:: http://meritbadge.herokuapp.com/arrayvec
.. _crates: https://crates.io/crates/arrayvec
.. |crates2| image:: http://meritbadge.herokuapp.com/nodrop
.. _crates2: https://crates.io/crates/nodrop
Recent Changes (arrayvec)
-------------------------
- 0.4.12
- Use raw pointers instead of ``get_unchecked_mut`` where the target may be
uninitialized a everywhere relevant in the ArrayVec implementation.
- 0.4.11
- In Rust 1.36 or later, use newly stable MaybeUninit. This extends the
soundness work introduced in 0.4.9, we are finally able to use this in
stable. We use feature detection (build script) to enable this at build
time.
- 0.4.10
- Use ``repr(C)`` in the ``union`` version that was introduced in 0.4.9, to
allay some soundness concerns.
- 0.4.9
- Use ``union`` in the implementation on when this is detected to be supported
(nightly only for now). This is a better solution for treating uninitialized
regions correctly, and we'll use it in stable Rust as soon as we are able.
When this is enabled, the ``ArrayVec`` has no space overhead in its memory
layout, although the size of the vec should not be relied upon. (See `#114`_)
- ``ArrayString`` updated to not use uninitialized memory, it instead zeros its
backing array. This will be refined in the next version, since we
need to make changes to the user visible API.
- The ``use_union`` feature now does nothing (like its documentation foretold).
.. _`#114`: https://github.com/bluss/arrayvec/pull/114
- 0.4.8
- Implement Clone and Debug for ``IntoIter`` by @clarcharr
- Add more array sizes under crate features. These cover all in the range
up to 128 and 129 to 255 respectively (we have a few of those by default):
- ``array-size-33-128``
- ``array-size-129-255``
- 0.4.7
- Fix future compat warning about raw pointer casts
- Use ``drop_in_place`` when dropping the arrayvec by-value iterator
- Decrease mininum Rust version (see docs) by @jeehoonkang
- 0.3.25
- Fix future compat warning about raw pointer casts
- 0.4.6
- Fix compilation on 16-bit targets. This means, the 65536 array size is not
included on these targets.
- 0.3.24
- Fix compilation on 16-bit targets. This means, the 65536 array size is not
included on these targets.
- Fix license files so that they are both included (was fixed in 0.4 before)
- 0.4.5
- Add methods to ``ArrayString`` by @DenialAdams:
- ``.pop() -> Option<char>``
- ``.truncate(new_len)``
- ``.remove(index) -> char``
- Remove dependency on crate odds
- Document debug assertions in unsafe methods better
- 0.4.4
- Add method ``ArrayVec::truncate()`` by @niklasf
- 0.4.3
- Improve performance for ``ArrayVec::extend`` with a lower level
implementation (#74)
- Small cleanup in dependencies (use no std for crates where we don't need more)
- 0.4.2
- Add constructor method ``new`` to ``CapacityError``.
- 0.4.1
- Add ``Default`` impl to ``ArrayString`` by @tbu-
- 0.4.0
- Reformed signatures and error handling by @bluss and @tbu-:
- ``ArrayVec``'s ``push, insert, remove, swap_remove`` now match ``Vec``'s
corresponding signature and panic on capacity errors where applicable.
- Add fallible methods ``try_push, insert`` and checked methods
``pop_at, swap_pop``.
- Similar changes to ``ArrayString``'s push methods.
- Use a local version of the ``RangeArgument`` trait
- Add array sizes 50, 150, 200 by @daboross
- Support serde 1.0 by @daboross
- New method ``.push_unchecked()`` by @niklasf
- ``ArrayString`` implements ``PartialOrd, Ord`` by @tbu-
- Require Rust 1.14
- crate feature ``use_generic_array`` was dropped.
- 0.3.23
- Implement ``PartialOrd, Ord`` as well as ``PartialOrd<str>`` for
``ArrayString``.
- 0.3.22
- Implement ``Array`` for the 65536 size
- 0.3.21
- Use ``encode_utf8`` from crate odds
- Add constructor ``ArrayString::from_byte_string``
- 0.3.20
- Simplify and speed up ``ArrayString``s ``.push(char)``-
- 0.3.19
- Add new crate feature ``use_generic_array`` which allows using their
``GenericArray`` just like a regular fixed size array for the storage
of an ``ArrayVec``.
- 0.3.18
- Fix bounds check in ``ArrayVec::insert``!
It would be buggy if ``self.len() < index < self.capacity()``. Take note of
the push out behavior specified in the docs.
- 0.3.17
- Added crate feature ``use_union`` which forwards to the nodrop crate feature
- Added methods ``.is_full()`` to ``ArrayVec`` and ``ArrayString``.
- 0.3.16
- Added method ``.retain()`` to ``ArrayVec``.
- Added methods ``.as_slice(), .as_mut_slice()`` to ``ArrayVec`` and ``.as_str()``
to ``ArrayString``.
- 0.3.15
- Add feature std, which you can opt out of to use ``no_std`` (requires Rust 1.6
to opt out).
- Implement ``Clone::clone_from`` for ArrayVec and ArrayString
- 0.3.14
- Add ``ArrayString::from(&str)``
- 0.3.13
- Added ``DerefMut`` impl for ``ArrayString``.
- Added method ``.simplify()`` to drop the element for ``CapacityError``.
- Added method ``.dispose()`` to ``ArrayVec``
- 0.3.12
- Added ArrayString, a fixed capacity analogy of String
- 0.3.11
- Added trait impls Default, PartialOrd, Ord, Write for ArrayVec
- 0.3.10
- Go back to using external NoDrop, fixing a panic safety bug (issue #3)
- 0.3.8
- Inline the non-dropping logic to remove one drop flag in the
ArrayVec representation.
- 0.3.7
- Added method .into_inner()
- Added unsafe method .set_len()
License
=======
Dual-licensed to be compatible with the Rust project.
Licensed under the Apache License, Version 2.0
http://www.apache.org/licenses/LICENSE-2.0 or the MIT license
http://opensource.org/licenses/MIT, at your
option. This file may not be copied, modified, or distributed
except according to those terms.

View File

@ -0,0 +1,90 @@
extern crate arrayvec;
#[macro_use] extern crate bencher;
use arrayvec::ArrayString;
use bencher::Bencher;
fn try_push_c(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
b.iter(|| {
v.clear();
while v.try_push('c').is_ok() {
}
v.len()
});
b.bytes = v.capacity() as u64;
}
fn try_push_alpha(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
b.iter(|| {
v.clear();
while v.try_push('α').is_ok() {
}
v.len()
});
b.bytes = v.capacity() as u64;
}
// Yes, pushing a string char-by-char is slow. Use .push_str.
fn try_push_string(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
let input = "abcαβγ“”";
b.iter(|| {
v.clear();
for ch in input.chars().cycle() {
if !v.try_push(ch).is_ok() {
break;
}
}
v.len()
});
b.bytes = v.capacity() as u64;
}
fn push_c(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
b.iter(|| {
v.clear();
while !v.is_full() {
v.push('c');
}
v.len()
});
b.bytes = v.capacity() as u64;
}
fn push_alpha(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
b.iter(|| {
v.clear();
while !v.is_full() {
v.push('α');
}
v.len()
});
b.bytes = v.capacity() as u64;
}
fn push_string(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
let input = "abcαβγ“”";
b.iter(|| {
v.clear();
for ch in input.chars().cycle() {
if !v.is_full() {
v.push(ch);
} else {
break;
}
}
v.len()
});
b.bytes = v.capacity() as u64;
}
benchmark_group!(benches, try_push_c, try_push_alpha, try_push_string, push_c,
push_alpha, push_string);
benchmark_main!(benches);

View File

@ -0,0 +1,43 @@
extern crate arrayvec;
#[macro_use] extern crate bencher;
use arrayvec::ArrayVec;
use bencher::Bencher;
fn extend_with_constant(b: &mut Bencher) {
let mut v = ArrayVec::<[u8; 512]>::new();
let cap = v.capacity();
b.iter(|| {
v.clear();
v.extend((0..cap).map(|_| 1));
v[0]
});
b.bytes = v.capacity() as u64;
}
fn extend_with_range(b: &mut Bencher) {
let mut v = ArrayVec::<[u8; 512]>::new();
let cap = v.capacity();
b.iter(|| {
v.clear();
v.extend((0..cap).map(|x| x as _));
v[0]
});
b.bytes = v.capacity() as u64;
}
fn extend_with_slice(b: &mut Bencher) {
let mut v = ArrayVec::<[u8; 512]>::new();
let data = [1; 512];
b.iter(|| {
v.clear();
v.extend(data.iter().cloned());
v[0]
});
b.bytes = v.capacity() as u64;
}
benchmark_group!(benches, extend_with_constant, extend_with_range, extend_with_slice);
benchmark_main!(benches);

View File

@ -0,0 +1,90 @@
use std::env;
use std::io::Write;
use std::process::{Command, Stdio};
fn main() {
// we need to output *some* file to opt out of the default
println!("cargo:rerun-if-changed=build.rs");
detect_maybe_uninit();
}
fn detect_maybe_uninit() {
let has_stable_maybe_uninit = probe(&stable_maybe_uninit());
if has_stable_maybe_uninit {
println!("cargo:rustc-cfg=has_stable_maybe_uninit");
return;
}
let has_unstable_union_with_md = probe(&maybe_uninit_code(true));
if has_unstable_union_with_md {
println!("cargo:rustc-cfg=has_manually_drop_in_union");
println!("cargo:rustc-cfg=has_union_feature");
}
}
// To guard against changes in this currently unstable feature, use
// a detection tests instead of a Rustc version and/or date test.
fn stable_maybe_uninit() -> String {
let code = "
#![allow(warnings)]
use std::mem::MaybeUninit;
fn main() { }
";
code.to_string()
}
// To guard against changes in this currently unstable feature, use
// a detection tests instead of a Rustc version and/or date test.
fn maybe_uninit_code(use_feature: bool) -> String {
let feature = if use_feature { "#![feature(untagged_unions)]" } else { "" };
let code = "
#![allow(warnings)]
use std::mem::ManuallyDrop;
#[derive(Copy)]
pub union MaybeUninit<T> {
empty: (),
value: ManuallyDrop<T>,
}
impl<T> Clone for MaybeUninit<T> where T: Copy
{
fn clone(&self) -> Self { *self }
}
fn main() {
let value1 = MaybeUninit::<[i32; 3]> { empty: () };
let value2 = MaybeUninit { value: ManuallyDrop::new([1, 2, 3]) };
}
";
[feature, code].concat()
}
/// Test if a code snippet can be compiled
fn probe(code: &str) -> bool {
let rustc = env::var_os("RUSTC").unwrap_or_else(|| "rustc".into());
let out_dir = env::var_os("OUT_DIR").expect("environment variable OUT_DIR");
let mut child = Command::new(rustc)
.arg("--out-dir")
.arg(out_dir)
.arg("--emit=obj")
.arg("-")
.stdin(Stdio::piped())
.spawn()
.expect("rustc probe");
child
.stdin
.as_mut()
.expect("rustc stdin")
.write_all(code.as_bytes())
.expect("write rustc stdin");
child.wait().expect("rustc probe").success()
}

View File

@ -0,0 +1,25 @@
.docblock pre.rust { background: #eeeeff; }
pre.trait, pre.fn, pre.struct, pre.enum, pre.typedef { background: #fcfefc; }
/* Small “example” label for doc examples */
.docblock pre.rust::before {
content: "example";
float: right;
font-style: italic;
font-size: 0.8em;
margin-top: -10px;
margin-right: -5px;
}
/* Fixup where display in trait listing */
pre.trait .where::before {
content: '\a ';
}
.docblock code {
background-color: inherit;
font-weight: bold;
padding: 0 0.1em;
}

View File

@ -0,0 +1,137 @@
/// Trait for fixed size arrays.
///
/// This trait is implemented for some specific array sizes, see
/// the implementor list below. At the current state of Rust we can't
/// make this fully general for every array size.
///
/// The following crate features add more array sizes (and they are not
/// enabled by default due to their impact on compliation speed).
///
/// - `array-sizes-33-128`: All sizes 33 to 128 are implemented
/// (a few in this range are included by default).
/// - `array-sizes-129-255`: All sizes 129 to 255 are implemented
/// (a few in this range are included by default).
pub unsafe trait Array {
/// The arrays element type
type Item;
#[doc(hidden)]
/// The smallest index type that indexes the array.
type Index: Index;
#[doc(hidden)]
fn as_ptr(&self) -> *const Self::Item;
#[doc(hidden)]
fn as_mut_ptr(&mut self) -> *mut Self::Item;
#[doc(hidden)]
fn capacity() -> usize;
}
pub trait Index : PartialEq + Copy {
fn to_usize(self) -> usize;
fn from(usize) -> Self;
}
use std::slice::{from_raw_parts};
pub trait ArrayExt : Array {
#[inline(always)]
fn as_slice(&self) -> &[Self::Item] {
unsafe {
from_raw_parts(self.as_ptr(), Self::capacity())
}
}
}
impl<A> ArrayExt for A where A: Array { }
impl Index for u8 {
#[inline(always)]
fn to_usize(self) -> usize { self as usize }
#[inline(always)]
fn from(ix: usize) -> Self { ix as u8 }
}
impl Index for u16 {
#[inline(always)]
fn to_usize(self) -> usize { self as usize }
#[inline(always)]
fn from(ix: usize) -> Self { ix as u16 }
}
impl Index for u32 {
#[inline(always)]
fn to_usize(self) -> usize { self as usize }
#[inline(always)]
fn from(ix: usize) -> Self { ix as u32 }
}
impl Index for usize {
#[inline(always)]
fn to_usize(self) -> usize { self }
#[inline(always)]
fn from(ix: usize) -> Self { ix }
}
macro_rules! fix_array_impl {
($index_type:ty, $len:expr ) => (
unsafe impl<T> Array for [T; $len] {
type Item = T;
type Index = $index_type;
#[doc(hidden)]
#[inline(always)]
fn as_ptr(&self) -> *const T { self as *const _ as *const _ }
#[doc(hidden)]
#[inline(always)]
fn as_mut_ptr(&mut self) -> *mut T { self as *mut _ as *mut _}
#[doc(hidden)]
#[inline(always)]
fn capacity() -> usize { $len }
}
)
}
macro_rules! fix_array_impl_recursive {
($index_type:ty, ) => ();
($index_type:ty, $($len:expr,)*) => (
$(fix_array_impl!($index_type, $len);)*
);
}
fix_array_impl_recursive!(u8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, );
#[cfg(not(feature="array-sizes-33-128"))]
fix_array_impl_recursive!(u8, 32, 40, 48, 50, 56, 64, 72, 96, 100, 128, );
#[cfg(feature="array-sizes-33-128")]
fix_array_impl_recursive!(u8,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
125, 126, 127, 128,
);
#[cfg(not(feature="array-sizes-129-255"))]
fix_array_impl_recursive!(u8, 160, 192, 200, 224,);
#[cfg(feature="array-sizes-129-255")]
fix_array_impl_recursive!(u8,
129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172,
173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204,
205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236,
237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
253, 254, 255,
);
fix_array_impl_recursive!(u16, 256, 384, 512, 768, 1024, 2048, 4096, 8192, 16384, 32768,);
// This array size doesn't exist on 16-bit
#[cfg(any(target_pointer_width="32", target_pointer_width="64"))]
fix_array_impl_recursive!(u32, 1 << 16,);

View File

@ -0,0 +1,516 @@
use std::borrow::Borrow;
use std::cmp;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::mem;
use std::ptr;
use std::ops::{Deref, DerefMut};
use std::str;
use std::str::Utf8Error;
use std::slice;
use array::{Array, ArrayExt};
use array::Index;
use CapacityError;
use char::encode_utf8;
#[cfg(feature="serde-1")]
use serde::{Serialize, Deserialize, Serializer, Deserializer};
/// A string with a fixed capacity.
///
/// The `ArrayString` is a string backed by a fixed size array. It keeps track
/// of its length.
///
/// The string is a contiguous value that you can store directly on the stack
/// if needed.
#[derive(Copy)]
pub struct ArrayString<A: Array<Item=u8>> {
// FIXME: Use Copyable union for xs when we can
xs: A,
len: A::Index,
}
impl<A: Array<Item=u8>> Default for ArrayString<A> {
/// Return an empty `ArrayString`
fn default() -> ArrayString<A> {
ArrayString::new()
}
}
impl<A: Array<Item=u8>> ArrayString<A> {
/// Create a new empty `ArrayString`.
///
/// Capacity is inferred from the type parameter.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 16]>::new();
/// string.push_str("foo");
/// assert_eq!(&string[..], "foo");
/// assert_eq!(string.capacity(), 16);
/// ```
pub fn new() -> ArrayString<A> {
unsafe {
ArrayString {
// FIXME: Use Copyable union for xs when we can
xs: mem::zeroed(),
len: Index::from(0),
}
}
}
/// Create a new `ArrayString` from a `str`.
///
/// Capacity is inferred from the type parameter.
///
/// **Errors** if the backing array is not large enough to fit the string.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 3]>::from("foo").unwrap();
/// assert_eq!(&string[..], "foo");
/// assert_eq!(string.len(), 3);
/// assert_eq!(string.capacity(), 3);
/// ```
pub fn from(s: &str) -> Result<Self, CapacityError<&str>> {
let mut arraystr = Self::new();
arraystr.try_push_str(s)?;
Ok(arraystr)
}
/// Create a new `ArrayString` from a byte string literal.
///
/// **Errors** if the byte string literal is not valid UTF-8.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let string = ArrayString::from_byte_string(b"hello world").unwrap();
/// ```
pub fn from_byte_string(b: &A) -> Result<Self, Utf8Error> {
let mut arraystr = Self::new();
let s = try!(str::from_utf8(b.as_slice()));
let _result = arraystr.try_push_str(s);
debug_assert!(_result.is_ok());
Ok(arraystr)
}
/// Return the capacity of the `ArrayString`.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let string = ArrayString::<[_; 3]>::new();
/// assert_eq!(string.capacity(), 3);
/// ```
#[inline]
pub fn capacity(&self) -> usize { A::capacity() }
/// Return if the `ArrayString` is completely filled.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 1]>::new();
/// assert!(!string.is_full());
/// string.push_str("A");
/// assert!(string.is_full());
/// ```
pub fn is_full(&self) -> bool { self.len() == self.capacity() }
/// Adds the given char to the end of the string.
///
/// ***Panics*** if the backing array is not large enough to fit the additional char.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 2]>::new();
///
/// string.push('a');
/// string.push('b');
///
/// assert_eq!(&string[..], "ab");
/// ```
pub fn push(&mut self, c: char) {
self.try_push(c).unwrap();
}
/// Adds the given char to the end of the string.
///
/// Returns `Ok` if the push succeeds.
///
/// **Errors** if the backing array is not large enough to fit the additional char.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 2]>::new();
///
/// string.try_push('a').unwrap();
/// string.try_push('b').unwrap();
/// let overflow = string.try_push('c');
///
/// assert_eq!(&string[..], "ab");
/// assert_eq!(overflow.unwrap_err().element(), 'c');
/// ```
pub fn try_push(&mut self, c: char) -> Result<(), CapacityError<char>> {
let len = self.len();
unsafe {
match encode_utf8(c, &mut self.raw_mut_bytes()[len..]) {
Ok(n) => {
self.set_len(len + n);
Ok(())
}
Err(_) => Err(CapacityError::new(c)),
}
}
}
/// Adds the given string slice to the end of the string.
///
/// ***Panics*** if the backing array is not large enough to fit the string.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 2]>::new();
///
/// string.push_str("a");
/// string.push_str("d");
///
/// assert_eq!(&string[..], "ad");
/// ```
pub fn push_str(&mut self, s: &str) {
self.try_push_str(s).unwrap()
}
/// Adds the given string slice to the end of the string.
///
/// Returns `Ok` if the push succeeds.
///
/// **Errors** if the backing array is not large enough to fit the string.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 2]>::new();
///
/// string.try_push_str("a").unwrap();
/// let overflow1 = string.try_push_str("bc");
/// string.try_push_str("d").unwrap();
/// let overflow2 = string.try_push_str("ef");
///
/// assert_eq!(&string[..], "ad");
/// assert_eq!(overflow1.unwrap_err().element(), "bc");
/// assert_eq!(overflow2.unwrap_err().element(), "ef");
/// ```
pub fn try_push_str<'a>(&mut self, s: &'a str) -> Result<(), CapacityError<&'a str>> {
if s.len() > self.capacity() - self.len() {
return Err(CapacityError::new(s));
}
unsafe {
let dst = self.xs.as_mut_ptr().offset(self.len() as isize);
let src = s.as_ptr();
ptr::copy_nonoverlapping(src, dst, s.len());
let newl = self.len() + s.len();
self.set_len(newl);
}
Ok(())
}
/// Removes the last character from the string and returns it.
///
/// Returns `None` if this `ArrayString` is empty.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut s = ArrayString::<[_; 3]>::from("foo").unwrap();
///
/// assert_eq!(s.pop(), Some('o'));
/// assert_eq!(s.pop(), Some('o'));
/// assert_eq!(s.pop(), Some('f'));
///
/// assert_eq!(s.pop(), None);
/// ```
#[inline]
pub fn pop(&mut self) -> Option<char> {
let ch = match self.chars().rev().next() {
Some(ch) => ch,
None => return None,
};
let new_len = self.len() - ch.len_utf8();
unsafe {
self.set_len(new_len);
}
Some(ch)
}
/// Shortens this `ArrayString` to the specified length.
///
/// If `new_len` is greater than the strings current length, this has no
/// effect.
///
/// ***Panics*** if `new_len` does not lie on a `char` boundary.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 6]>::from("foobar").unwrap();
/// string.truncate(3);
/// assert_eq!(&string[..], "foo");
/// string.truncate(4);
/// assert_eq!(&string[..], "foo");
/// ```
#[inline]
pub fn truncate(&mut self, new_len: usize) {
if new_len <= self.len() {
assert!(self.is_char_boundary(new_len));
unsafe {
// In libstd truncate is called on the underlying vector,
// which in turns drops each element.
// As we know we don't have to worry about Drop,
// we can just set the length (a la clear.)
self.set_len(new_len);
}
}
}
/// Removes a `char` from this `ArrayString` at a byte position and returns it.
///
/// This is an `O(n)` operation, as it requires copying every element in the
/// array.
///
/// ***Panics*** if `idx` is larger than or equal to the `ArrayString`s length,
/// or if it does not lie on a `char` boundary.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut s = ArrayString::<[_; 3]>::from("foo").unwrap();
///
/// assert_eq!(s.remove(0), 'f');
/// assert_eq!(s.remove(1), 'o');
/// assert_eq!(s.remove(0), 'o');
/// ```
#[inline]
pub fn remove(&mut self, idx: usize) -> char {
let ch = match self[idx..].chars().next() {
Some(ch) => ch,
None => panic!("cannot remove a char from the end of a string"),
};
let next = idx + ch.len_utf8();
let len = self.len();
unsafe {
ptr::copy(self.xs.as_ptr().offset(next as isize),
self.xs.as_mut_ptr().offset(idx as isize),
len - next);
self.set_len(len - (next - idx));
}
ch
}
/// Make the string empty.
pub fn clear(&mut self) {
unsafe {
self.set_len(0);
}
}
/// Set the stringss length.
///
/// This function is `unsafe` because it changes the notion of the
/// number of “valid” bytes in the string. Use with care.
///
/// This method uses *debug assertions* to check the validity of `length`
/// and may use other debug assertions.
#[inline]
pub unsafe fn set_len(&mut self, length: usize) {
debug_assert!(length <= self.capacity());
self.len = Index::from(length);
}
/// Return a string slice of the whole `ArrayString`.
pub fn as_str(&self) -> &str {
self
}
/// Return a mutable slice of the whole strings buffer
unsafe fn raw_mut_bytes(&mut self) -> &mut [u8] {
slice::from_raw_parts_mut(self.xs.as_mut_ptr(), self.capacity())
}
}
impl<A: Array<Item=u8>> Deref for ArrayString<A> {
type Target = str;
#[inline]
fn deref(&self) -> &str {
unsafe {
let sl = slice::from_raw_parts(self.xs.as_ptr(), self.len.to_usize());
str::from_utf8_unchecked(sl)
}
}
}
impl<A: Array<Item=u8>> DerefMut for ArrayString<A> {
#[inline]
fn deref_mut(&mut self) -> &mut str {
unsafe {
let sl = slice::from_raw_parts_mut(self.xs.as_mut_ptr(), self.len.to_usize());
// FIXME: Nothing but transmute to do this right now
mem::transmute(sl)
}
}
}
impl<A: Array<Item=u8>> PartialEq for ArrayString<A> {
fn eq(&self, rhs: &Self) -> bool {
**self == **rhs
}
}
impl<A: Array<Item=u8>> PartialEq<str> for ArrayString<A> {
fn eq(&self, rhs: &str) -> bool {
&**self == rhs
}
}
impl<A: Array<Item=u8>> PartialEq<ArrayString<A>> for str {
fn eq(&self, rhs: &ArrayString<A>) -> bool {
self == &**rhs
}
}
impl<A: Array<Item=u8>> Eq for ArrayString<A> { }
impl<A: Array<Item=u8>> Hash for ArrayString<A> {
fn hash<H: Hasher>(&self, h: &mut H) {
(**self).hash(h)
}
}
impl<A: Array<Item=u8>> Borrow<str> for ArrayString<A> {
fn borrow(&self) -> &str { self }
}
impl<A: Array<Item=u8>> AsRef<str> for ArrayString<A> {
fn as_ref(&self) -> &str { self }
}
impl<A: Array<Item=u8>> fmt::Debug for ArrayString<A> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { (**self).fmt(f) }
}
impl<A: Array<Item=u8>> fmt::Display for ArrayString<A> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { (**self).fmt(f) }
}
/// `Write` appends written data to the end of the string.
impl<A: Array<Item=u8>> fmt::Write for ArrayString<A> {
fn write_char(&mut self, c: char) -> fmt::Result {
self.try_push(c).map_err(|_| fmt::Error)
}
fn write_str(&mut self, s: &str) -> fmt::Result {
self.try_push_str(s).map_err(|_| fmt::Error)
}
}
impl<A: Array<Item=u8> + Copy> Clone for ArrayString<A> {
fn clone(&self) -> ArrayString<A> {
*self
}
fn clone_from(&mut self, rhs: &Self) {
// guaranteed to fit due to types matching.
self.clear();
self.try_push_str(rhs).ok();
}
}
impl<A: Array<Item=u8>> PartialOrd for ArrayString<A> {
fn partial_cmp(&self, rhs: &Self) -> Option<cmp::Ordering> {
(**self).partial_cmp(&**rhs)
}
fn lt(&self, rhs: &Self) -> bool { **self < **rhs }
fn le(&self, rhs: &Self) -> bool { **self <= **rhs }
fn gt(&self, rhs: &Self) -> bool { **self > **rhs }
fn ge(&self, rhs: &Self) -> bool { **self >= **rhs }
}
impl<A: Array<Item=u8>> PartialOrd<str> for ArrayString<A> {
fn partial_cmp(&self, rhs: &str) -> Option<cmp::Ordering> {
(**self).partial_cmp(rhs)
}
fn lt(&self, rhs: &str) -> bool { &**self < rhs }
fn le(&self, rhs: &str) -> bool { &**self <= rhs }
fn gt(&self, rhs: &str) -> bool { &**self > rhs }
fn ge(&self, rhs: &str) -> bool { &**self >= rhs }
}
impl<A: Array<Item=u8>> PartialOrd<ArrayString<A>> for str {
fn partial_cmp(&self, rhs: &ArrayString<A>) -> Option<cmp::Ordering> {
self.partial_cmp(&**rhs)
}
fn lt(&self, rhs: &ArrayString<A>) -> bool { self < &**rhs }
fn le(&self, rhs: &ArrayString<A>) -> bool { self <= &**rhs }
fn gt(&self, rhs: &ArrayString<A>) -> bool { self > &**rhs }
fn ge(&self, rhs: &ArrayString<A>) -> bool { self >= &**rhs }
}
impl<A: Array<Item=u8>> Ord for ArrayString<A> {
fn cmp(&self, rhs: &Self) -> cmp::Ordering {
(**self).cmp(&**rhs)
}
}
#[cfg(feature="serde-1")]
/// Requires crate feature `"serde-1"`
impl<A: Array<Item=u8>> Serialize for ArrayString<A> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer
{
serializer.serialize_str(&*self)
}
}
#[cfg(feature="serde-1")]
/// Requires crate feature `"serde-1"`
impl<'de, A: Array<Item=u8>> Deserialize<'de> for ArrayString<A> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: Deserializer<'de>
{
use serde::de::{self, Visitor};
use std::marker::PhantomData;
struct ArrayStringVisitor<A: Array<Item=u8>>(PhantomData<A>);
impl<'de, A: Array<Item=u8>> Visitor<'de> for ArrayStringVisitor<A> {
type Value = ArrayString<A>;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a string no more than {} bytes long", A::capacity())
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where E: de::Error,
{
ArrayString::from(v).map_err(|_| E::invalid_length(v.len(), &self))
}
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where E: de::Error,
{
let s = try!(str::from_utf8(v).map_err(|_| E::invalid_value(de::Unexpected::Bytes(v), &self)));
ArrayString::from(s).map_err(|_| E::invalid_length(s.len(), &self))
}
}
deserializer.deserialize_str(ArrayStringVisitor::<A>(PhantomData))
}
}

View File

@ -0,0 +1,54 @@
// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//
// Original authors: alexchrichton, bluss
// UTF-8 ranges and tags for encoding characters
const TAG_CONT: u8 = 0b1000_0000;
const TAG_TWO_B: u8 = 0b1100_0000;
const TAG_THREE_B: u8 = 0b1110_0000;
const TAG_FOUR_B: u8 = 0b1111_0000;
const MAX_ONE_B: u32 = 0x80;
const MAX_TWO_B: u32 = 0x800;
const MAX_THREE_B: u32 = 0x10000;
/// Placeholder
pub struct EncodeUtf8Error;
/// Encode a char into buf using UTF-8.
///
/// On success, return the byte length of the encoding (1, 2, 3 or 4).<br>
/// On error, return `EncodeUtf8Error` if the buffer was too short for the char.
#[inline]
pub fn encode_utf8(ch: char, buf: &mut [u8]) -> Result<usize, EncodeUtf8Error>
{
let code = ch as u32;
if code < MAX_ONE_B && buf.len() >= 1 {
buf[0] = code as u8;
return Ok(1);
} else if code < MAX_TWO_B && buf.len() >= 2 {
buf[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
buf[1] = (code & 0x3F) as u8 | TAG_CONT;
return Ok(2);
} else if code < MAX_THREE_B && buf.len() >= 3 {
buf[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
buf[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
buf[2] = (code & 0x3F) as u8 | TAG_CONT;
return Ok(3);
} else if buf.len() >= 4 {
buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
return Ok(4);
};
Err(EncodeUtf8Error)
}

View File

@ -0,0 +1,53 @@
use std::fmt;
#[cfg(feature="std")]
use std::any::Any;
#[cfg(feature="std")]
use std::error::Error;
/// Error value indicating insufficient capacity
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)]
pub struct CapacityError<T = ()> {
element: T,
}
impl<T> CapacityError<T> {
/// Create a new `CapacityError` from `element`.
pub fn new(element: T) -> CapacityError<T> {
CapacityError {
element: element,
}
}
/// Extract the overflowing element
pub fn element(self) -> T {
self.element
}
/// Convert into a `CapacityError` that does not carry an element.
pub fn simplify(self) -> CapacityError {
CapacityError { element: () }
}
}
const CAPERROR: &'static str = "insufficient capacity";
#[cfg(feature="std")]
/// Requires `features="std"`.
impl<T: Any> Error for CapacityError<T> {
fn description(&self) -> &str {
CAPERROR
}
}
impl<T> fmt::Display for CapacityError<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", CAPERROR)
}
}
impl<T> fmt::Debug for CapacityError<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}: {}", "CapacityError", CAPERROR)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,46 @@
use array::Array;
use std::mem::ManuallyDrop;
/// A combination of ManuallyDrop and “maybe uninitialized”;
/// this wraps a value that can be wholly or partially uninitialized;
/// it also has no drop regardless of the type of T.
#[repr(C)] // for cast from self ptr to value
pub union MaybeUninit<T> {
empty: (),
value: ManuallyDrop<T>,
}
// Why we don't use std's MaybeUninit on nightly? See the ptr method
impl<T> MaybeUninit<T> {
/// Create a new MaybeUninit with uninitialized interior
pub unsafe fn uninitialized() -> Self {
MaybeUninit { empty: () }
}
/// Create a new MaybeUninit from the value `v`.
pub fn from(v: T) -> Self {
MaybeUninit { value: ManuallyDrop::new(v) }
}
// Raw pointer casts written so that we don't reference or access the
// uninitialized interior value
/// Return a raw pointer to the start of the interior array
pub fn ptr(&self) -> *const T::Item
where T: Array
{
// std MaybeUninit creates a &self.value reference here which is
// not guaranteed to be sound in our case - we will partially
// initialize the value, not always wholly.
self as *const _ as *const T::Item
}
/// Return a mut raw pointer to the start of the interior array
pub fn ptr_mut(&mut self) -> *mut T::Item
where T: Array
{
self as *mut _ as *mut T::Item
}
}

View File

@ -0,0 +1,41 @@
use array::Array;
use nodrop::NoDrop;
use std::mem::uninitialized;
/// A combination of NoDrop and “maybe uninitialized”;
/// this wraps a value that can be wholly or partially uninitialized.
///
/// NOTE: This is known to not be a good solution, but it's the one we have kept
/// working on stable Rust. Stable improvements are encouraged, in any form,
/// but of course we are waiting for a real, stable, MaybeUninit.
pub struct MaybeUninit<T>(NoDrop<T>);
// why don't we use ManuallyDrop here: It doesn't inhibit
// enum layout optimizations that depend on T, and we support older Rust.
impl<T> MaybeUninit<T> {
/// Create a new MaybeUninit with uninitialized interior
pub unsafe fn uninitialized() -> Self {
Self::from(uninitialized())
}
/// Create a new MaybeUninit from the value `v`.
pub fn from(v: T) -> Self {
MaybeUninit(NoDrop::new(v))
}
/// Return a raw pointer to the start of the interior array
pub fn ptr(&self) -> *const T::Item
where T: Array
{
&*self.0 as *const T as *const _
}
/// Return a mut raw pointer to the start of the interior array
pub fn ptr_mut(&mut self) -> *mut T::Item
where T: Array
{
&mut *self.0 as *mut T as *mut _
}
}

View File

@ -0,0 +1,40 @@
use array::Array;
use std::mem::MaybeUninit as StdMaybeUninit;
pub struct MaybeUninit<T> {
inner: StdMaybeUninit<T>,
}
impl<T> MaybeUninit<T> {
/// Create a new MaybeUninit with uninitialized interior
pub unsafe fn uninitialized() -> Self {
MaybeUninit { inner: StdMaybeUninit::uninit() }
}
/// Create a new MaybeUninit from the value `v`.
pub fn from(v: T) -> Self {
MaybeUninit { inner: StdMaybeUninit::new(v) }
}
// Raw pointer casts written so that we don't reference or access the
// uninitialized interior value
/// Return a raw pointer to the start of the interior array
pub fn ptr(&self) -> *const T::Item
where T: Array
{
// std MaybeUninit creates a &self.value reference here which is
// not guaranteed to be sound in our case - we will partially
// initialize the value, not always wholly.
self.inner.as_ptr() as *const T::Item
}
/// Return a mut raw pointer to the start of the interior array
pub fn ptr_mut(&mut self) -> *mut T::Item
where T: Array
{
self.inner.as_mut_ptr() as *mut T::Item
}
}

View File

@ -0,0 +1,42 @@
use std::ops::{
RangeFull,
RangeFrom,
RangeTo,
Range,
};
/// `RangeArgument` is implemented by Rust's built-in range types, produced
/// by range syntax like `..`, `a..`, `..b` or `c..d`.
///
/// Note: This is arrayvec's provisional trait, waiting for stable Rust to
/// provide an equivalent.
pub trait RangeArgument {
#[inline]
/// Start index (inclusive)
fn start(&self) -> Option<usize> { None }
#[inline]
/// End index (exclusive)
fn end(&self) -> Option<usize> { None }
}
impl RangeArgument for RangeFull {}
impl RangeArgument for RangeFrom<usize> {
#[inline]
fn start(&self) -> Option<usize> { Some(self.start) }
}
impl RangeArgument for RangeTo<usize> {
#[inline]
fn end(&self) -> Option<usize> { Some(self.end) }
}
impl RangeArgument for Range<usize> {
#[inline]
fn start(&self) -> Option<usize> { Some(self.start) }
#[inline]
fn end(&self) -> Option<usize> { Some(self.end) }
}

View File

@ -0,0 +1,79 @@
#![cfg(feature = "serde-1")]
extern crate arrayvec;
extern crate serde_test;
mod array_vec {
use arrayvec::ArrayVec;
use serde_test::{Token, assert_tokens, assert_de_tokens_error};
#[test]
fn test_ser_de_empty() {
let vec = ArrayVec::<[u32; 0]>::new();
assert_tokens(&vec, &[
Token::Seq { len: Some(0) },
Token::SeqEnd,
]);
}
#[test]
fn test_ser_de() {
let mut vec = ArrayVec::<[u32; 3]>::new();
vec.push(20);
vec.push(55);
vec.push(123);
assert_tokens(&vec, &[
Token::Seq { len: Some(3) },
Token::U32(20),
Token::U32(55),
Token::U32(123),
Token::SeqEnd,
]);
}
#[test]
fn test_de_too_large() {
assert_de_tokens_error::<ArrayVec<[u32; 2]>>(&[
Token::Seq { len: Some(3) },
Token::U32(13),
Token::U32(42),
Token::U32(68),
], "invalid length 3, expected an array with no more than 2 items");
}
}
mod array_string {
use arrayvec::ArrayString;
use serde_test::{Token, assert_tokens, assert_de_tokens_error};
#[test]
fn test_ser_de_empty() {
let string = ArrayString::<[u8; 0]>::new();
assert_tokens(&string, &[
Token::Str(""),
]);
}
#[test]
fn test_ser_de() {
let string = ArrayString::<[u8; 9]>::from("1234 abcd")
.expect("expected exact specified capacity to be enough");
assert_tokens(&string, &[
Token::Str("1234 abcd"),
]);
}
#[test]
fn test_de_too_large() {
assert_de_tokens_error::<ArrayString<[u8; 2]>>(&[
Token::Str("afd")
], "invalid length 3, expected a string no more than 2 bytes long");
}
}

View File

@ -0,0 +1,517 @@
extern crate arrayvec;
#[macro_use] extern crate matches;
use arrayvec::ArrayVec;
use arrayvec::ArrayString;
use std::mem;
use arrayvec::CapacityError;
use std::collections::HashMap;
#[test]
fn test_simple() {
use std::ops::Add;
let mut vec: ArrayVec<[Vec<i32>; 3]> = ArrayVec::new();
vec.push(vec![1, 2, 3, 4]);
vec.push(vec![10]);
vec.push(vec![-1, 13, -2]);
for elt in &vec {
assert_eq!(elt.iter().fold(0, Add::add), 10);
}
let sum_len = vec.into_iter().map(|x| x.len()).fold(0, Add::add);
assert_eq!(sum_len, 8);
}
#[test]
fn test_u16_index() {
const N: usize = 4096;
let mut vec: ArrayVec<[_; N]> = ArrayVec::new();
for _ in 0..N {
assert!(vec.try_push(1u8).is_ok());
}
assert!(vec.try_push(0).is_err());
assert_eq!(vec.len(), N);
}
#[test]
fn test_iter() {
let mut iter = ArrayVec::from([1, 2, 3]).into_iter();
assert_eq!(iter.size_hint(), (3, Some(3)));
assert_eq!(iter.next_back(), Some(3));
assert_eq!(iter.next(), Some(1));
assert_eq!(iter.next_back(), Some(2));
assert_eq!(iter.size_hint(), (0, Some(0)));
assert_eq!(iter.next_back(), None);
}
#[test]
fn test_drop() {
use std::cell::Cell;
let flag = &Cell::new(0);
#[derive(Clone)]
struct Bump<'a>(&'a Cell<i32>);
impl<'a> Drop for Bump<'a> {
fn drop(&mut self) {
let n = self.0.get();
self.0.set(n + 1);
}
}
{
let mut array = ArrayVec::<[Bump; 128]>::new();
array.push(Bump(flag));
array.push(Bump(flag));
}
assert_eq!(flag.get(), 2);
// test something with the nullable pointer optimization
flag.set(0);
{
let mut array = ArrayVec::<[_; 3]>::new();
array.push(vec![Bump(flag)]);
array.push(vec![Bump(flag), Bump(flag)]);
array.push(vec![]);
let push4 = array.try_push(vec![Bump(flag)]);
assert_eq!(flag.get(), 0);
drop(push4);
assert_eq!(flag.get(), 1);
drop(array.pop());
assert_eq!(flag.get(), 1);
drop(array.pop());
assert_eq!(flag.get(), 3);
}
assert_eq!(flag.get(), 4);
// test into_inner
flag.set(0);
{
let mut array = ArrayVec::<[_; 3]>::new();
array.push(Bump(flag));
array.push(Bump(flag));
array.push(Bump(flag));
let inner = array.into_inner();
assert!(inner.is_ok());
assert_eq!(flag.get(), 0);
drop(inner);
assert_eq!(flag.get(), 3);
}
// test cloning into_iter
flag.set(0);
{
let mut array = ArrayVec::<[_; 3]>::new();
array.push(Bump(flag));
array.push(Bump(flag));
array.push(Bump(flag));
let mut iter = array.into_iter();
assert_eq!(flag.get(), 0);
iter.next();
assert_eq!(flag.get(), 1);
let clone = iter.clone();
assert_eq!(flag.get(), 1);
drop(clone);
assert_eq!(flag.get(), 3);
drop(iter);
assert_eq!(flag.get(), 5);
}
}
#[test]
fn test_extend() {
let mut range = 0..10;
let mut array: ArrayVec<[_; 5]> = range.by_ref().collect();
assert_eq!(&array[..], &[0, 1, 2, 3, 4]);
assert_eq!(range.next(), Some(5));
array.extend(range.by_ref());
assert_eq!(range.next(), Some(6));
let mut array: ArrayVec<[_; 10]> = (0..3).collect();
assert_eq!(&array[..], &[0, 1, 2]);
array.extend(3..5);
assert_eq!(&array[..], &[0, 1, 2, 3, 4]);
}
#[test]
fn test_is_send_sync() {
let data = ArrayVec::<[Vec<i32>; 5]>::new();
&data as &Send;
&data as &Sync;
}
#[test]
fn test_compact_size() {
// Future rust will kill these drop flags!
// 4 elements size + 1 len + 1 enum tag + [1 drop flag]
type ByteArray = ArrayVec<[u8; 4]>;
println!("{}", mem::size_of::<ByteArray>());
assert!(mem::size_of::<ByteArray>() <= 8);
// 12 element size + 1 enum tag + 3 padding + 1 len + 1 drop flag + 2 padding
type QuadArray = ArrayVec<[u32; 3]>;
println!("{}", mem::size_of::<QuadArray>());
assert!(mem::size_of::<QuadArray>() <= 24);
}
#[test]
fn test_still_works_with_option_arrayvec() {
type RefArray = ArrayVec<[&'static i32; 2]>;
let array = Some(RefArray::new());
assert!(array.is_some());
println!("{:?}", array);
}
#[test]
fn test_drain() {
let mut v = ArrayVec::from([0; 8]);
v.pop();
v.drain(0..7);
assert_eq!(&v[..], &[]);
v.extend(0..);
v.drain(1..4);
assert_eq!(&v[..], &[0, 4, 5, 6, 7]);
let u: ArrayVec<[_; 3]> = v.drain(1..4).rev().collect();
assert_eq!(&u[..], &[6, 5, 4]);
assert_eq!(&v[..], &[0, 7]);
v.drain(..);
assert_eq!(&v[..], &[]);
}
#[test]
fn test_retain() {
let mut v = ArrayVec::from([0; 8]);
for (i, elt) in v.iter_mut().enumerate() {
*elt = i;
}
v.retain(|_| true);
assert_eq!(&v[..], &[0, 1, 2, 3, 4, 5, 6, 7]);
v.retain(|elt| {
*elt /= 2;
*elt % 2 == 0
});
assert_eq!(&v[..], &[0, 0, 2, 2]);
v.retain(|_| false);
assert_eq!(&v[..], &[]);
}
#[test]
#[should_panic]
fn test_drain_oob() {
let mut v = ArrayVec::from([0; 8]);
v.pop();
v.drain(0..8);
}
#[test]
#[should_panic]
fn test_drop_panic() {
struct DropPanic;
impl Drop for DropPanic {
fn drop(&mut self) {
panic!("drop");
}
}
let mut array = ArrayVec::<[DropPanic; 1]>::new();
array.push(DropPanic);
}
#[test]
#[should_panic]
fn test_drop_panic_into_iter() {
struct DropPanic;
impl Drop for DropPanic {
fn drop(&mut self) {
panic!("drop");
}
}
let mut array = ArrayVec::<[DropPanic; 1]>::new();
array.push(DropPanic);
array.into_iter();
}
#[test]
fn test_insert() {
let mut v = ArrayVec::from([]);
assert_matches!(v.try_push(1), Err(_));
let mut v = ArrayVec::<[_; 3]>::new();
v.insert(0, 0);
v.insert(1, 1);
//let ret1 = v.try_insert(3, 3);
//assert_matches!(ret1, Err(InsertError::OutOfBounds(_)));
assert_eq!(&v[..], &[0, 1]);
v.insert(2, 2);
assert_eq!(&v[..], &[0, 1, 2]);
let ret2 = v.try_insert(1, 9);
assert_eq!(&v[..], &[0, 1, 2]);
assert_matches!(ret2, Err(_));
let mut v = ArrayVec::from([2]);
assert_matches!(v.try_insert(0, 1), Err(CapacityError { .. }));
assert_matches!(v.try_insert(1, 1), Err(CapacityError { .. }));
//assert_matches!(v.try_insert(2, 1), Err(CapacityError { .. }));
}
#[test]
fn test_into_inner_1() {
let mut v = ArrayVec::from([1, 2]);
v.pop();
let u = v.clone();
assert_eq!(v.into_inner(), Err(u));
}
#[test]
fn test_into_inner_2() {
let mut v = ArrayVec::<[String; 4]>::new();
v.push("a".into());
v.push("b".into());
v.push("c".into());
v.push("d".into());
assert_eq!(v.into_inner().unwrap(), ["a", "b", "c", "d"]);
}
#[test]
fn test_into_inner_3_() {
let mut v = ArrayVec::<[i32; 4]>::new();
v.extend(1..);
assert_eq!(v.into_inner().unwrap(), [1, 2, 3, 4]);
}
#[test]
fn test_write() {
use std::io::Write;
let mut v = ArrayVec::<[_; 8]>::new();
write!(&mut v, "\x01\x02\x03").unwrap();
assert_eq!(&v[..], &[1, 2, 3]);
let r = v.write(&[9; 16]).unwrap();
assert_eq!(r, 5);
assert_eq!(&v[..], &[1, 2, 3, 9, 9, 9, 9, 9]);
}
#[test]
fn array_clone_from() {
let mut v = ArrayVec::<[_; 4]>::new();
v.push(vec![1, 2]);
v.push(vec![3, 4, 5]);
v.push(vec![6]);
let reference = v.to_vec();
let mut u = ArrayVec::<[_; 4]>::new();
u.clone_from(&v);
assert_eq!(&u, &reference[..]);
let mut t = ArrayVec::<[_; 4]>::new();
t.push(vec![97]);
t.push(vec![]);
t.push(vec![5, 6, 2]);
t.push(vec![2]);
t.clone_from(&v);
assert_eq!(&t, &reference[..]);
t.clear();
t.clone_from(&v);
assert_eq!(&t, &reference[..]);
}
#[test]
fn test_string() {
use std::error::Error;
let text = "hello world";
let mut s = ArrayString::<[_; 16]>::new();
s.try_push_str(text).unwrap();
assert_eq!(&s, text);
assert_eq!(text, &s);
// Make sure Hash / Eq / Borrow match up so we can use HashMap
let mut map = HashMap::new();
map.insert(s, 1);
assert_eq!(map[text], 1);
let mut t = ArrayString::<[_; 2]>::new();
assert!(t.try_push_str(text).is_err());
assert_eq!(&t, "");
t.push_str("ab");
// DerefMut
let tmut: &mut str = &mut t;
assert_eq!(tmut, "ab");
// Test Error trait / try
let t = || -> Result<(), Box<Error>> {
let mut t = ArrayString::<[_; 2]>::new();
try!(t.try_push_str(text));
Ok(())
}();
assert!(t.is_err());
}
#[test]
fn test_string_from() {
let text = "hello world";
// Test `from` constructor
let u = ArrayString::<[_; 11]>::from(text).unwrap();
assert_eq!(&u, text);
assert_eq!(u.len(), text.len());
}
#[test]
fn test_string_from_bytes() {
let text = "hello world";
let u = ArrayString::from_byte_string(b"hello world").unwrap();
assert_eq!(&u, text);
assert_eq!(u.len(), text.len());
}
#[test]
fn test_string_clone() {
let text = "hi";
let mut s = ArrayString::<[_; 4]>::new();
s.push_str("abcd");
let t = ArrayString::<[_; 4]>::from(text).unwrap();
s.clone_from(&t);
assert_eq!(&t, &s);
}
#[test]
fn test_string_push() {
let text = "abcαβγ";
let mut s = ArrayString::<[_; 8]>::new();
for c in text.chars() {
if let Err(_) = s.try_push(c) {
break;
}
}
assert_eq!("abcαβ", &s[..]);
s.push('x');
assert_eq!("abcαβx", &s[..]);
assert!(s.try_push('x').is_err());
}
#[test]
fn test_insert_at_length() {
let mut v = ArrayVec::<[_; 8]>::new();
let result1 = v.try_insert(0, "a");
let result2 = v.try_insert(1, "b");
assert!(result1.is_ok() && result2.is_ok());
assert_eq!(&v[..], &["a", "b"]);
}
#[should_panic]
#[test]
fn test_insert_out_of_bounds() {
let mut v = ArrayVec::<[_; 8]>::new();
let _ = v.try_insert(1, "test");
}
/*
* insert that pushes out the last
let mut u = ArrayVec::from([1, 2, 3, 4]);
let ret = u.try_insert(3, 99);
assert_eq!(&u[..], &[1, 2, 3, 99]);
assert_matches!(ret, Err(_));
let ret = u.try_insert(4, 77);
assert_eq!(&u[..], &[1, 2, 3, 99]);
assert_matches!(ret, Err(_));
*/
#[test]
fn test_drop_in_insert() {
use std::cell::Cell;
let flag = &Cell::new(0);
struct Bump<'a>(&'a Cell<i32>);
impl<'a> Drop for Bump<'a> {
fn drop(&mut self) {
let n = self.0.get();
self.0.set(n + 1);
}
}
flag.set(0);
{
let mut array = ArrayVec::<[_; 2]>::new();
array.push(Bump(flag));
array.insert(0, Bump(flag));
assert_eq!(flag.get(), 0);
let ret = array.try_insert(1, Bump(flag));
assert_eq!(flag.get(), 0);
assert_matches!(ret, Err(_));
drop(ret);
assert_eq!(flag.get(), 1);
}
assert_eq!(flag.get(), 3);
}
#[test]
fn test_pop_at() {
let mut v = ArrayVec::<[String; 4]>::new();
let s = String::from;
v.push(s("a"));
v.push(s("b"));
v.push(s("c"));
v.push(s("d"));
assert_eq!(v.pop_at(4), None);
assert_eq!(v.pop_at(1), Some(s("b")));
assert_eq!(v.pop_at(1), Some(s("c")));
assert_eq!(v.pop_at(2), None);
assert_eq!(&v[..], &["a", "d"]);
}
#[test]
fn test_sizes() {
let v = ArrayVec::from([0u8; 1 << 16]);
assert_eq!(vec![0u8; v.len()], &v[..]);
}
#[test]
fn test_default() {
use std::net;
let s: ArrayString<[u8; 4]> = Default::default();
// Something without `Default` implementation.
let v: ArrayVec<[net::TcpStream; 4]> = Default::default();
assert_eq!(s.len(), 0);
assert_eq!(v.len(), 0);
}
#[cfg(feature="array-sizes-33-128")]
#[test]
fn test_sizes_33_128() {
ArrayVec::from([0u8; 52]);
ArrayVec::from([0u8; 127]);
}
#[cfg(feature="array-sizes-129-255")]
#[test]
fn test_sizes_129_255() {
ArrayVec::from([0u8; 237]);
ArrayVec::from([0u8; 255]);
}
#[test]
fn test_newish_stable_uses_maybe_uninit() {
if option_env!("ARRAYVECTEST_ENSURE_MAYBEUNINIT").map(|s| !s.is_empty()).unwrap_or(false) {
assert!(cfg!(has_stable_maybe_uninit));
}
}

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"e7405a91fea075bb4fedb0e76e2039af27d6c380beaa31150f37655d79a7a3ab","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0245ee104228a100ce5fceecf43e25faae450494d9173f43fd94c27d69fdac13","README.rst":"8fab86c3c759d153a1a8a48e5f7f48546c898f0ec91433001c57fe0002af6455","benches/arraystring.rs":"f12b890977117ebde4ca42bcd6b91f2a6a087f2b235aaca6d15e30d125ae9f67","benches/extend.rs":"c3d69cc488ec5341b019cfed545ebbfea252f98718037b413f6a349da9489d1b","custom.css":"e6f2cd299392337b4e2959c52f422e5b7be11920ea98d10db44d10ddef5ed47c","src/array.rs":"8a42b3ff7a5a0713e8ee22462f303b0ce15bdc49a9fd5eb64f58e56855bdf944","src/array_string.rs":"fdcc24f0fd07e781b378f5d0190279e6d9c89b422f67e546ae443c602f967896","src/char.rs":"40af597d93895f206abcd33953b5d3d5a512d3b16ff5f96e492e659d9cca4209","src/errors.rs":"dde99bffaddfd45396aab7e07642cc018ef5435fe60c4f26a2c05a36555be18c","src/lib.rs":"4c00e50b532aec68b52fde4a737b7b5980b0cfb28f5c09ab8408d04896895a87","src/maybe_uninit.rs":"00659a86e8f84852d4355077a16beceaad0440ac0e81851fbac712fdb1850622","tests/serde.rs":"18c165cf6024f04a25b19aa139657d7c59f72d1541c9b24b44f9eaea01f507db","tests/tests.rs":"9633b92fe6c650b9b816cecac23b9c9e6a0365b1f67d4f0bfaad9e645e2bdc49"},"package":"cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"}

View File

@ -0,0 +1,46 @@
"""
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # "MIT,Apache-2.0"
])
load(
"@io_bazel_rules_rust//rust:rust.bzl",
"rust_library",
"rust_binary",
"rust_test",
)
# Unsupported target "arraystring" with type "bench" omitted
rust_library(
name = "arrayvec",
crate_root = "src/lib.rs",
crate_type = "lib",
edition = "2018",
srcs = glob(["**/*.rs"]),
deps = [
],
rustc_flags = [
"--cap-lints=allow",
],
version = "0.5.1",
crate_features = [
],
)
# Unsupported target "extend" with type "bench" omitted
# Unsupported target "serde" with type "test" omitted
# Unsupported target "tests" with type "test" omitted

View File

@ -0,0 +1,62 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "arrayvec"
version = "0.5.1"
authors = ["bluss"]
description = "A vector with fixed capacity, backed by an array (it can be stored on the stack too). Implements fixed capacity ArrayVec and ArrayString."
documentation = "https://docs.rs/arrayvec/"
keywords = ["stack", "vector", "array", "data-structure", "no_std"]
categories = ["data-structures", "no-std"]
license = "MIT/Apache-2.0"
repository = "https://github.com/bluss/arrayvec"
[package.metadata.docs.rs]
features = ["serde"]
[package.metadata.release]
no-dev-version = true
tag-name = "{{version}}"
[profile.bench]
debug = true
[profile.release]
debug = true
[[bench]]
name = "extend"
harness = false
[[bench]]
name = "arraystring"
harness = false
[dependencies.serde]
version = "1.0"
optional = true
default-features = false
[dev-dependencies.bencher]
version = "0.1.4"
[dev-dependencies.matches]
version = "0.1"
[dev-dependencies.serde_test]
version = "1.0"
[build-dependencies]
[features]
array-sizes-129-255 = []
array-sizes-33-128 = []
default = ["std"]
std = []

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,25 @@
Copyright (c) Ulrik Sverdrup "bluss" 2015-2017
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,259 @@
arrayvec
========
A vector with fixed capacity.
Please read the `API documentation here`__
__ https://docs.rs/arrayvec
|build_status|_ |crates|_ |crates2|_
.. |build_status| image:: https://travis-ci.org/bluss/arrayvec.svg
.. _build_status: https://travis-ci.org/bluss/arrayvec
.. |crates| image:: http://meritbadge.herokuapp.com/arrayvec
.. _crates: https://crates.io/crates/arrayvec
.. |crates2| image:: http://meritbadge.herokuapp.com/nodrop
.. _crates2: https://crates.io/crates/nodrop
Recent Changes (arrayvec)
-------------------------
- 0.5.1
- Add ``as_ptr``, ``as_mut_ptr`` accessors directly on the ``ArrayVec`` by @tbu-
(matches the same addition to ``Vec`` which happened in Rust 1.37).
- Add method ``ArrayString::len`` (now available directly, not just through deref to str).
- Use raw pointers instead of ``&mut [u8]`` for encoding chars into ``ArrayString``
(uninit best practice fix).
- Use raw pointers instead of ``get_unchecked_mut`` where the target may be
uninitialized a everywhere relevant in the ArrayVec implementation
(uninit best practice fix).
- Changed inline hints on many methods, mainly removing inline hints
- ``ArrayVec::dispose`` is now deprecated (it has no purpose anymore)
- 0.4.12
- Use raw pointers instead of ``get_unchecked_mut`` where the target may be
uninitialized a everywhere relevant in the ArrayVec implementation.
- 0.5.0
- Use ``MaybeUninit`` (now unconditionally) in the implementation of
``ArrayVec``
- Use ``MaybeUninit`` (now unconditionally) in the implementation of
``ArrayString``
- The crate feature for serde serialization is now named ``serde``.
- Updated the ``Array`` trait interface, and it is now easier to use for
users outside the crate.
- Add ``FromStr`` impl for ``ArrayString`` by @despawnerer
- Add method ``try_extend_from_slice`` to ``ArrayVec``, which is always
effecient by @Thomasdezeeuw.
- Add method ``remaining_capacity`` by @Thomasdezeeuw
- Improve performance of the ``extend`` method.
- The index type of zero capacity vectors is now itself zero size, by
@clarfon
- Use ``drop_in_place`` for truncate and clear methods. This affects drop order
and resume from panic during drop.
- Use Rust 2018 edition for the implementation
- Require Rust 1.36 or later, for the unconditional ``MaybeUninit``
improvements.
- 0.4.11
- In Rust 1.36 or later, use newly stable MaybeUninit. This extends the
soundness work introduced in 0.4.9, we are finally able to use this in
stable. We use feature detection (build script) to enable this at build
time.
- 0.4.10
- Use ``repr(C)`` in the ``union`` version that was introduced in 0.4.9, to
allay some soundness concerns.
- 0.4.9
- Use ``union`` in the implementation on when this is detected to be supported
(nightly only for now). This is a better solution for treating uninitialized
regions correctly, and we'll use it in stable Rust as soon as we are able.
When this is enabled, the ``ArrayVec`` has no space overhead in its memory
layout, although the size of the vec should not be relied upon. (See `#114`_)
- ``ArrayString`` updated to not use uninitialized memory, it instead zeros its
backing array. This will be refined in the next version, since we
need to make changes to the user visible API.
- The ``use_union`` feature now does nothing (like its documentation foretold).
.. _`#114`: https://github.com/bluss/arrayvec/pull/114
- 0.4.8
- Implement Clone and Debug for ``IntoIter`` by @clarcharr
- Add more array sizes under crate features. These cover all in the range
up to 128 and 129 to 255 respectively (we have a few of those by default):
- ``array-size-33-128``
- ``array-size-129-255``
- 0.4.7
- Fix future compat warning about raw pointer casts
- Use ``drop_in_place`` when dropping the arrayvec by-value iterator
- Decrease mininum Rust version (see docs) by @jeehoonkang
- 0.3.25
- Fix future compat warning about raw pointer casts
- 0.4.6
- Fix compilation on 16-bit targets. This means, the 65536 array size is not
included on these targets.
- 0.3.24
- Fix compilation on 16-bit targets. This means, the 65536 array size is not
included on these targets.
- Fix license files so that they are both included (was fixed in 0.4 before)
- 0.4.5
- Add methods to ``ArrayString`` by @DenialAdams:
- ``.pop() -> Option<char>``
- ``.truncate(new_len)``
- ``.remove(index) -> char``
- Remove dependency on crate odds
- Document debug assertions in unsafe methods better
- 0.4.4
- Add method ``ArrayVec::truncate()`` by @niklasf
- 0.4.3
- Improve performance for ``ArrayVec::extend`` with a lower level
implementation (#74)
- Small cleanup in dependencies (use no std for crates where we don't need more)
- 0.4.2
- Add constructor method ``new`` to ``CapacityError``.
- 0.4.1
- Add ``Default`` impl to ``ArrayString`` by @tbu-
- 0.4.0
- Reformed signatures and error handling by @bluss and @tbu-:
- ``ArrayVec``'s ``push, insert, remove, swap_remove`` now match ``Vec``'s
corresponding signature and panic on capacity errors where applicable.
- Add fallible methods ``try_push, insert`` and checked methods
``pop_at, swap_pop``.
- Similar changes to ``ArrayString``'s push methods.
- Use a local version of the ``RangeArgument`` trait
- Add array sizes 50, 150, 200 by @daboross
- Support serde 1.0 by @daboross
- New method ``.push_unchecked()`` by @niklasf
- ``ArrayString`` implements ``PartialOrd, Ord`` by @tbu-
- Require Rust 1.14
- crate feature ``use_generic_array`` was dropped.
- 0.3.23
- Implement ``PartialOrd, Ord`` as well as ``PartialOrd<str>`` for
``ArrayString``.
- 0.3.22
- Implement ``Array`` for the 65536 size
- 0.3.21
- Use ``encode_utf8`` from crate odds
- Add constructor ``ArrayString::from_byte_string``
- 0.3.20
- Simplify and speed up ``ArrayString``s ``.push(char)``-
- 0.3.19
- Add new crate feature ``use_generic_array`` which allows using their
``GenericArray`` just like a regular fixed size array for the storage
of an ``ArrayVec``.
- 0.3.18
- Fix bounds check in ``ArrayVec::insert``!
It would be buggy if ``self.len() < index < self.capacity()``. Take note of
the push out behavior specified in the docs.
- 0.3.17
- Added crate feature ``use_union`` which forwards to the nodrop crate feature
- Added methods ``.is_full()`` to ``ArrayVec`` and ``ArrayString``.
- 0.3.16
- Added method ``.retain()`` to ``ArrayVec``.
- Added methods ``.as_slice(), .as_mut_slice()`` to ``ArrayVec`` and ``.as_str()``
to ``ArrayString``.
- 0.3.15
- Add feature std, which you can opt out of to use ``no_std`` (requires Rust 1.6
to opt out).
- Implement ``Clone::clone_from`` for ArrayVec and ArrayString
- 0.3.14
- Add ``ArrayString::from(&str)``
- 0.3.13
- Added ``DerefMut`` impl for ``ArrayString``.
- Added method ``.simplify()`` to drop the element for ``CapacityError``.
- Added method ``.dispose()`` to ``ArrayVec``
- 0.3.12
- Added ArrayString, a fixed capacity analogy of String
- 0.3.11
- Added trait impls Default, PartialOrd, Ord, Write for ArrayVec
- 0.3.10
- Go back to using external NoDrop, fixing a panic safety bug (issue #3)
- 0.3.8
- Inline the non-dropping logic to remove one drop flag in the
ArrayVec representation.
- 0.3.7
- Added method .into_inner()
- Added unsafe method .set_len()
License
=======
Dual-licensed to be compatible with the Rust project.
Licensed under the Apache License, Version 2.0
http://www.apache.org/licenses/LICENSE-2.0 or the MIT license
http://opensource.org/licenses/MIT, at your
option. This file may not be copied, modified, or distributed
except according to those terms.

View File

@ -0,0 +1,90 @@
extern crate arrayvec;
#[macro_use] extern crate bencher;
use arrayvec::ArrayString;
use bencher::Bencher;
fn try_push_c(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
b.iter(|| {
v.clear();
while v.try_push('c').is_ok() {
}
v.len()
});
b.bytes = v.capacity() as u64;
}
fn try_push_alpha(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
b.iter(|| {
v.clear();
while v.try_push('α').is_ok() {
}
v.len()
});
b.bytes = v.capacity() as u64;
}
// Yes, pushing a string char-by-char is slow. Use .push_str.
fn try_push_string(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
let input = "abcαβγ“”";
b.iter(|| {
v.clear();
for ch in input.chars().cycle() {
if !v.try_push(ch).is_ok() {
break;
}
}
v.len()
});
b.bytes = v.capacity() as u64;
}
fn push_c(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
b.iter(|| {
v.clear();
while !v.is_full() {
v.push('c');
}
v.len()
});
b.bytes = v.capacity() as u64;
}
fn push_alpha(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
b.iter(|| {
v.clear();
while !v.is_full() {
v.push('α');
}
v.len()
});
b.bytes = v.capacity() as u64;
}
fn push_string(b: &mut Bencher) {
let mut v = ArrayString::<[u8; 512]>::new();
let input = "abcαβγ“”";
b.iter(|| {
v.clear();
for ch in input.chars().cycle() {
if !v.is_full() {
v.push(ch);
} else {
break;
}
}
v.len()
});
b.bytes = v.capacity() as u64;
}
benchmark_group!(benches, try_push_c, try_push_alpha, try_push_string, push_c,
push_alpha, push_string);
benchmark_main!(benches);

View File

@ -0,0 +1,78 @@
extern crate arrayvec;
#[macro_use] extern crate bencher;
use std::io::Write;
use arrayvec::ArrayVec;
use bencher::Bencher;
use bencher::black_box;
fn extend_with_constant(b: &mut Bencher) {
let mut v = ArrayVec::<[u8; 512]>::new();
let cap = v.capacity();
b.iter(|| {
v.clear();
let constant = black_box(1);
v.extend((0..cap).map(move |_| constant));
v[511]
});
b.bytes = v.capacity() as u64;
}
fn extend_with_range(b: &mut Bencher) {
let mut v = ArrayVec::<[u8; 512]>::new();
let cap = v.capacity();
b.iter(|| {
v.clear();
let range = 0..cap;
v.extend(range.map(|x| black_box(x as _)));
v[511]
});
b.bytes = v.capacity() as u64;
}
fn extend_with_slice(b: &mut Bencher) {
let mut v = ArrayVec::<[u8; 512]>::new();
let data = [1; 512];
b.iter(|| {
v.clear();
let iter = data.iter().map(|&x| x);
v.extend(iter);
v[511]
});
b.bytes = v.capacity() as u64;
}
fn extend_with_write(b: &mut Bencher) {
let mut v = ArrayVec::<[u8; 512]>::new();
let data = [1; 512];
b.iter(|| {
v.clear();
v.write(&data[..]).ok();
v[511]
});
b.bytes = v.capacity() as u64;
}
fn extend_from_slice(b: &mut Bencher) {
let mut v = ArrayVec::<[u8; 512]>::new();
let data = [1; 512];
b.iter(|| {
v.clear();
v.try_extend_from_slice(&data).ok();
v[511]
});
b.bytes = v.capacity() as u64;
}
benchmark_group!(benches,
extend_with_constant,
extend_with_range,
extend_with_slice,
extend_with_write,
extend_from_slice
);
benchmark_main!(benches);

View File

@ -0,0 +1,25 @@
.docblock pre.rust { background: #eeeeff; }
pre.trait, pre.fn, pre.struct, pre.enum, pre.typedef { background: #fcfefc; }
/* Small “example” label for doc examples */
.docblock pre.rust::before {
content: "example";
float: right;
font-style: italic;
font-size: 0.8em;
margin-top: -10px;
margin-right: -5px;
}
/* Fixup where display in trait listing */
pre.trait .where::before {
content: '\a ';
}
.docblock code {
background-color: inherit;
font-weight: bold;
padding: 0 0.1em;
}

View File

@ -0,0 +1,144 @@
/// Trait for fixed size arrays.
///
/// This trait is implemented for some specific array sizes, see
/// the implementor list below. At the current state of Rust we can't
/// make this fully general for every array size.
///
/// The following crate features add more array sizes (and they are not
/// enabled by default due to their impact on compliation speed).
///
/// - `array-sizes-33-128`: All sizes 33 to 128 are implemented
/// (a few in this range are included by default).
/// - `array-sizes-129-255`: All sizes 129 to 255 are implemented
/// (a few in this range are included by default).
///
/// ## Safety
///
/// This trait can *only* be implemented by fixed-size arrays or types with
/// *exactly* the representation of a fixed size array (of the right element
/// type and capacity).
///
/// Normally this trait is an implementation detail of arrayvec and doesnt
/// need implementing.
pub unsafe trait Array {
/// The arrays element type
type Item;
/// The smallest type that can index and tell the length of the array.
#[doc(hidden)]
type Index: Index;
/// The array's element capacity
const CAPACITY: usize;
fn as_slice(&self) -> &[Self::Item];
fn as_mut_slice(&mut self) -> &mut [Self::Item];
}
pub trait Index : PartialEq + Copy {
fn to_usize(self) -> usize;
fn from(_: usize) -> Self;
}
impl Index for () {
#[inline(always)]
fn to_usize(self) -> usize { 0 }
#[inline(always)]
fn from(_ix: usize) -> Self { () }
}
impl Index for bool {
#[inline(always)]
fn to_usize(self) -> usize { self as usize }
#[inline(always)]
fn from(ix: usize) -> Self { ix != 0 }
}
impl Index for u8 {
#[inline(always)]
fn to_usize(self) -> usize { self as usize }
#[inline(always)]
fn from(ix: usize) -> Self { ix as u8 }
}
impl Index for u16 {
#[inline(always)]
fn to_usize(self) -> usize { self as usize }
#[inline(always)]
fn from(ix: usize) -> Self { ix as u16 }
}
impl Index for u32 {
#[inline(always)]
fn to_usize(self) -> usize { self as usize }
#[inline(always)]
fn from(ix: usize) -> Self { ix as u32 }
}
impl Index for usize {
#[inline(always)]
fn to_usize(self) -> usize { self }
#[inline(always)]
fn from(ix: usize) -> Self { ix }
}
macro_rules! fix_array_impl {
($index_type:ty, $len:expr ) => (
unsafe impl<T> Array for [T; $len] {
type Item = T;
type Index = $index_type;
const CAPACITY: usize = $len;
#[doc(hidden)]
fn as_slice(&self) -> &[Self::Item] { self }
#[doc(hidden)]
fn as_mut_slice(&mut self) -> &mut [Self::Item] { self }
}
)
}
macro_rules! fix_array_impl_recursive {
($index_type:ty, ) => ();
($index_type:ty, $($len:expr,)*) => (
$(fix_array_impl!($index_type, $len);)*
);
}
fix_array_impl_recursive!((), 0,);
fix_array_impl_recursive!(bool, 1,);
fix_array_impl_recursive!(u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, );
#[cfg(not(feature="array-sizes-33-128"))]
fix_array_impl_recursive!(u8, 32, 40, 48, 50, 56, 64, 72, 96, 100, 128, );
#[cfg(feature="array-sizes-33-128")]
fix_array_impl_recursive!(u8,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
125, 126, 127, 128,
);
#[cfg(not(feature="array-sizes-129-255"))]
fix_array_impl_recursive!(u8, 160, 192, 200, 224,);
#[cfg(feature="array-sizes-129-255")]
fix_array_impl_recursive!(u8,
129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172,
173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204,
205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236,
237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
253, 254, 255,
);
fix_array_impl_recursive!(u16, 256, 384, 512, 768, 1024, 2048, 4096, 8192, 16384, 32768,);
// This array size doesn't exist on 16-bit
#[cfg(any(target_pointer_width="32", target_pointer_width="64"))]
fix_array_impl_recursive!(u32, 1 << 16,);

View File

@ -0,0 +1,567 @@
use std::borrow::Borrow;
use std::cmp;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ptr;
use std::ops::{Deref, DerefMut};
use std::str;
use std::str::FromStr;
use std::str::Utf8Error;
use std::slice;
use crate::array::Array;
use crate::array::Index;
use crate::CapacityError;
use crate::char::encode_utf8;
#[cfg(feature="serde")]
use serde::{Serialize, Deserialize, Serializer, Deserializer};
use super::MaybeUninit as MaybeUninitCopy;
/// A string with a fixed capacity.
///
/// The `ArrayString` is a string backed by a fixed size array. It keeps track
/// of its length.
///
/// The string is a contiguous value that you can store directly on the stack
/// if needed.
#[derive(Copy)]
pub struct ArrayString<A>
where A: Array<Item=u8> + Copy
{
xs: MaybeUninitCopy<A>,
len: A::Index,
}
impl<A> Default for ArrayString<A>
where A: Array<Item=u8> + Copy
{
/// Return an empty `ArrayString`
fn default() -> ArrayString<A> {
ArrayString::new()
}
}
impl<A> ArrayString<A>
where A: Array<Item=u8> + Copy
{
/// Create a new empty `ArrayString`.
///
/// Capacity is inferred from the type parameter.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 16]>::new();
/// string.push_str("foo");
/// assert_eq!(&string[..], "foo");
/// assert_eq!(string.capacity(), 16);
/// ```
pub fn new() -> ArrayString<A> {
unsafe {
ArrayString {
xs: MaybeUninitCopy::uninitialized(),
len: Index::from(0),
}
}
}
/// Return the length of the string.
#[inline]
pub fn len(&self) -> usize { self.len.to_usize() }
/// Create a new `ArrayString` from a `str`.
///
/// Capacity is inferred from the type parameter.
///
/// **Errors** if the backing array is not large enough to fit the string.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 3]>::from("foo").unwrap();
/// assert_eq!(&string[..], "foo");
/// assert_eq!(string.len(), 3);
/// assert_eq!(string.capacity(), 3);
/// ```
pub fn from(s: &str) -> Result<Self, CapacityError<&str>> {
let mut arraystr = Self::new();
arraystr.try_push_str(s)?;
Ok(arraystr)
}
/// Create a new `ArrayString` from a byte string literal.
///
/// **Errors** if the byte string literal is not valid UTF-8.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let string = ArrayString::from_byte_string(b"hello world").unwrap();
/// ```
pub fn from_byte_string(b: &A) -> Result<Self, Utf8Error> {
let len = str::from_utf8(b.as_slice())?.len();
debug_assert_eq!(len, A::CAPACITY);
Ok(ArrayString {
xs: MaybeUninitCopy::from(*b),
len: Index::from(A::CAPACITY),
})
}
/// Return the capacity of the `ArrayString`.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let string = ArrayString::<[_; 3]>::new();
/// assert_eq!(string.capacity(), 3);
/// ```
#[inline(always)]
pub fn capacity(&self) -> usize { A::CAPACITY }
/// Return if the `ArrayString` is completely filled.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 1]>::new();
/// assert!(!string.is_full());
/// string.push_str("A");
/// assert!(string.is_full());
/// ```
pub fn is_full(&self) -> bool { self.len() == self.capacity() }
/// Adds the given char to the end of the string.
///
/// ***Panics*** if the backing array is not large enough to fit the additional char.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 2]>::new();
///
/// string.push('a');
/// string.push('b');
///
/// assert_eq!(&string[..], "ab");
/// ```
pub fn push(&mut self, c: char) {
self.try_push(c).unwrap();
}
/// Adds the given char to the end of the string.
///
/// Returns `Ok` if the push succeeds.
///
/// **Errors** if the backing array is not large enough to fit the additional char.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 2]>::new();
///
/// string.try_push('a').unwrap();
/// string.try_push('b').unwrap();
/// let overflow = string.try_push('c');
///
/// assert_eq!(&string[..], "ab");
/// assert_eq!(overflow.unwrap_err().element(), 'c');
/// ```
pub fn try_push(&mut self, c: char) -> Result<(), CapacityError<char>> {
let len = self.len();
unsafe {
let ptr = self.xs.ptr_mut().add(len);
let remaining_cap = self.capacity() - len;
match encode_utf8(c, ptr, remaining_cap) {
Ok(n) => {
self.set_len(len + n);
Ok(())
}
Err(_) => Err(CapacityError::new(c)),
}
}
}
/// Adds the given string slice to the end of the string.
///
/// ***Panics*** if the backing array is not large enough to fit the string.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 2]>::new();
///
/// string.push_str("a");
/// string.push_str("d");
///
/// assert_eq!(&string[..], "ad");
/// ```
pub fn push_str(&mut self, s: &str) {
self.try_push_str(s).unwrap()
}
/// Adds the given string slice to the end of the string.
///
/// Returns `Ok` if the push succeeds.
///
/// **Errors** if the backing array is not large enough to fit the string.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 2]>::new();
///
/// string.try_push_str("a").unwrap();
/// let overflow1 = string.try_push_str("bc");
/// string.try_push_str("d").unwrap();
/// let overflow2 = string.try_push_str("ef");
///
/// assert_eq!(&string[..], "ad");
/// assert_eq!(overflow1.unwrap_err().element(), "bc");
/// assert_eq!(overflow2.unwrap_err().element(), "ef");
/// ```
pub fn try_push_str<'a>(&mut self, s: &'a str) -> Result<(), CapacityError<&'a str>> {
if s.len() > self.capacity() - self.len() {
return Err(CapacityError::new(s));
}
unsafe {
let dst = self.xs.ptr_mut().offset(self.len() as isize);
let src = s.as_ptr();
ptr::copy_nonoverlapping(src, dst, s.len());
let newl = self.len() + s.len();
self.set_len(newl);
}
Ok(())
}
/// Removes the last character from the string and returns it.
///
/// Returns `None` if this `ArrayString` is empty.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut s = ArrayString::<[_; 3]>::from("foo").unwrap();
///
/// assert_eq!(s.pop(), Some('o'));
/// assert_eq!(s.pop(), Some('o'));
/// assert_eq!(s.pop(), Some('f'));
///
/// assert_eq!(s.pop(), None);
/// ```
pub fn pop(&mut self) -> Option<char> {
let ch = match self.chars().rev().next() {
Some(ch) => ch,
None => return None,
};
let new_len = self.len() - ch.len_utf8();
unsafe {
self.set_len(new_len);
}
Some(ch)
}
/// Shortens this `ArrayString` to the specified length.
///
/// If `new_len` is greater than the strings current length, this has no
/// effect.
///
/// ***Panics*** if `new_len` does not lie on a `char` boundary.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut string = ArrayString::<[_; 6]>::from("foobar").unwrap();
/// string.truncate(3);
/// assert_eq!(&string[..], "foo");
/// string.truncate(4);
/// assert_eq!(&string[..], "foo");
/// ```
pub fn truncate(&mut self, new_len: usize) {
if new_len <= self.len() {
assert!(self.is_char_boundary(new_len));
unsafe {
// In libstd truncate is called on the underlying vector,
// which in turns drops each element.
// As we know we don't have to worry about Drop,
// we can just set the length (a la clear.)
self.set_len(new_len);
}
}
}
/// Removes a `char` from this `ArrayString` at a byte position and returns it.
///
/// This is an `O(n)` operation, as it requires copying every element in the
/// array.
///
/// ***Panics*** if `idx` is larger than or equal to the `ArrayString`s length,
/// or if it does not lie on a `char` boundary.
///
/// ```
/// use arrayvec::ArrayString;
///
/// let mut s = ArrayString::<[_; 3]>::from("foo").unwrap();
///
/// assert_eq!(s.remove(0), 'f');
/// assert_eq!(s.remove(1), 'o');
/// assert_eq!(s.remove(0), 'o');
/// ```
pub fn remove(&mut self, idx: usize) -> char {
let ch = match self[idx..].chars().next() {
Some(ch) => ch,
None => panic!("cannot remove a char from the end of a string"),
};
let next = idx + ch.len_utf8();
let len = self.len();
unsafe {
ptr::copy(self.xs.ptr().offset(next as isize),
self.xs.ptr_mut().offset(idx as isize),
len - next);
self.set_len(len - (next - idx));
}
ch
}
/// Make the string empty.
pub fn clear(&mut self) {
unsafe {
self.set_len(0);
}
}
/// Set the stringss length.
///
/// This function is `unsafe` because it changes the notion of the
/// number of “valid” bytes in the string. Use with care.
///
/// This method uses *debug assertions* to check the validity of `length`
/// and may use other debug assertions.
pub unsafe fn set_len(&mut self, length: usize) {
debug_assert!(length <= self.capacity());
self.len = Index::from(length);
}
/// Return a string slice of the whole `ArrayString`.
pub fn as_str(&self) -> &str {
self
}
}
impl<A> Deref for ArrayString<A>
where A: Array<Item=u8> + Copy
{
type Target = str;
#[inline]
fn deref(&self) -> &str {
unsafe {
let sl = slice::from_raw_parts(self.xs.ptr(), self.len.to_usize());
str::from_utf8_unchecked(sl)
}
}
}
impl<A> DerefMut for ArrayString<A>
where A: Array<Item=u8> + Copy
{
#[inline]
fn deref_mut(&mut self) -> &mut str {
unsafe {
let sl = slice::from_raw_parts_mut(self.xs.ptr_mut(), self.len.to_usize());
str::from_utf8_unchecked_mut(sl)
}
}
}
impl<A> PartialEq for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn eq(&self, rhs: &Self) -> bool {
**self == **rhs
}
}
impl<A> PartialEq<str> for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn eq(&self, rhs: &str) -> bool {
&**self == rhs
}
}
impl<A> PartialEq<ArrayString<A>> for str
where A: Array<Item=u8> + Copy
{
fn eq(&self, rhs: &ArrayString<A>) -> bool {
self == &**rhs
}
}
impl<A> Eq for ArrayString<A>
where A: Array<Item=u8> + Copy
{ }
impl<A> Hash for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn hash<H: Hasher>(&self, h: &mut H) {
(**self).hash(h)
}
}
impl<A> Borrow<str> for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn borrow(&self) -> &str { self }
}
impl<A> AsRef<str> for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn as_ref(&self) -> &str { self }
}
impl<A> fmt::Debug for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { (**self).fmt(f) }
}
impl<A> fmt::Display for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { (**self).fmt(f) }
}
/// `Write` appends written data to the end of the string.
impl<A> fmt::Write for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn write_char(&mut self, c: char) -> fmt::Result {
self.try_push(c).map_err(|_| fmt::Error)
}
fn write_str(&mut self, s: &str) -> fmt::Result {
self.try_push_str(s).map_err(|_| fmt::Error)
}
}
impl<A> Clone for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn clone(&self) -> ArrayString<A> {
*self
}
fn clone_from(&mut self, rhs: &Self) {
// guaranteed to fit due to types matching.
self.clear();
self.try_push_str(rhs).ok();
}
}
impl<A> PartialOrd for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn partial_cmp(&self, rhs: &Self) -> Option<cmp::Ordering> {
(**self).partial_cmp(&**rhs)
}
fn lt(&self, rhs: &Self) -> bool { **self < **rhs }
fn le(&self, rhs: &Self) -> bool { **self <= **rhs }
fn gt(&self, rhs: &Self) -> bool { **self > **rhs }
fn ge(&self, rhs: &Self) -> bool { **self >= **rhs }
}
impl<A> PartialOrd<str> for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn partial_cmp(&self, rhs: &str) -> Option<cmp::Ordering> {
(**self).partial_cmp(rhs)
}
fn lt(&self, rhs: &str) -> bool { &**self < rhs }
fn le(&self, rhs: &str) -> bool { &**self <= rhs }
fn gt(&self, rhs: &str) -> bool { &**self > rhs }
fn ge(&self, rhs: &str) -> bool { &**self >= rhs }
}
impl<A> PartialOrd<ArrayString<A>> for str
where A: Array<Item=u8> + Copy
{
fn partial_cmp(&self, rhs: &ArrayString<A>) -> Option<cmp::Ordering> {
self.partial_cmp(&**rhs)
}
fn lt(&self, rhs: &ArrayString<A>) -> bool { self < &**rhs }
fn le(&self, rhs: &ArrayString<A>) -> bool { self <= &**rhs }
fn gt(&self, rhs: &ArrayString<A>) -> bool { self > &**rhs }
fn ge(&self, rhs: &ArrayString<A>) -> bool { self >= &**rhs }
}
impl<A> Ord for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn cmp(&self, rhs: &Self) -> cmp::Ordering {
(**self).cmp(&**rhs)
}
}
impl<A> FromStr for ArrayString<A>
where A: Array<Item=u8> + Copy
{
type Err = CapacityError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::from(s).map_err(CapacityError::simplify)
}
}
#[cfg(feature="serde")]
/// Requires crate feature `"serde"`
impl<A> Serialize for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer
{
serializer.serialize_str(&*self)
}
}
#[cfg(feature="serde")]
/// Requires crate feature `"serde"`
impl<'de, A> Deserialize<'de> for ArrayString<A>
where A: Array<Item=u8> + Copy
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: Deserializer<'de>
{
use serde::de::{self, Visitor};
use std::marker::PhantomData;
struct ArrayStringVisitor<A: Array<Item=u8>>(PhantomData<A>);
impl<'de, A: Copy + Array<Item=u8>> Visitor<'de> for ArrayStringVisitor<A> {
type Value = ArrayString<A>;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a string no more than {} bytes long", A::CAPACITY)
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where E: de::Error,
{
ArrayString::from(v).map_err(|_| E::invalid_length(v.len(), &self))
}
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where E: de::Error,
{
let s = str::from_utf8(v).map_err(|_| E::invalid_value(de::Unexpected::Bytes(v), &self))?;
ArrayString::from(s).map_err(|_| E::invalid_length(s.len(), &self))
}
}
deserializer.deserialize_str(ArrayStringVisitor::<A>(PhantomData))
}
}

View File

@ -0,0 +1,98 @@
// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//
// Original authors: alexchrichton, bluss
use std::ptr;
// UTF-8 ranges and tags for encoding characters
const TAG_CONT: u8 = 0b1000_0000;
const TAG_TWO_B: u8 = 0b1100_0000;
const TAG_THREE_B: u8 = 0b1110_0000;
const TAG_FOUR_B: u8 = 0b1111_0000;
const MAX_ONE_B: u32 = 0x80;
const MAX_TWO_B: u32 = 0x800;
const MAX_THREE_B: u32 = 0x10000;
/// Placeholder
pub struct EncodeUtf8Error;
#[inline]
unsafe fn write(ptr: *mut u8, index: usize, byte: u8) {
ptr::write(ptr.add(index), byte)
}
/// Encode a char into buf using UTF-8.
///
/// On success, return the byte length of the encoding (1, 2, 3 or 4).<br>
/// On error, return `EncodeUtf8Error` if the buffer was too short for the char.
///
/// Safety: `ptr` must be writable for `len` bytes.
#[inline]
pub unsafe fn encode_utf8(ch: char, ptr: *mut u8, len: usize) -> Result<usize, EncodeUtf8Error>
{
let code = ch as u32;
if code < MAX_ONE_B && len >= 1 {
write(ptr, 0, code as u8);
return Ok(1);
} else if code < MAX_TWO_B && len >= 2 {
write(ptr, 0, (code >> 6 & 0x1F) as u8 | TAG_TWO_B);
write(ptr, 1, (code & 0x3F) as u8 | TAG_CONT);
return Ok(2);
} else if code < MAX_THREE_B && len >= 3 {
write(ptr, 0, (code >> 12 & 0x0F) as u8 | TAG_THREE_B);
write(ptr, 1, (code >> 6 & 0x3F) as u8 | TAG_CONT);
write(ptr, 2, (code & 0x3F) as u8 | TAG_CONT);
return Ok(3);
} else if len >= 4 {
write(ptr, 0, (code >> 18 & 0x07) as u8 | TAG_FOUR_B);
write(ptr, 1, (code >> 12 & 0x3F) as u8 | TAG_CONT);
write(ptr, 2, (code >> 6 & 0x3F) as u8 | TAG_CONT);
write(ptr, 3, (code & 0x3F) as u8 | TAG_CONT);
return Ok(4);
};
Err(EncodeUtf8Error)
}
#[test]
fn test_encode_utf8() {
// Test that all codepoints are encoded correctly
let mut data = [0u8; 16];
for codepoint in 0..=(std::char::MAX as u32) {
if let Some(ch) = std::char::from_u32(codepoint) {
for elt in &mut data { *elt = 0; }
let ptr = data.as_mut_ptr();
let len = data.len();
unsafe {
let res = encode_utf8(ch, ptr, len).ok().unwrap();
assert_eq!(res, ch.len_utf8());
}
let string = std::str::from_utf8(&data).unwrap();
assert_eq!(string.chars().next(), Some(ch));
}
}
}
#[test]
fn test_encode_utf8_oob() {
// test that we report oob if the buffer is too short
let mut data = [0u8; 16];
let chars = ['a', 'α', '<27>', '𐍈'];
for (len, &ch) in (1..=4).zip(&chars) {
assert_eq!(len, ch.len_utf8(), "Len of ch={}", ch);
let ptr = data.as_mut_ptr();
unsafe {
assert!(matches::matches!(encode_utf8(ch, ptr, len - 1), Err(_)));
assert!(matches::matches!(encode_utf8(ch, ptr, len), Ok(_)));
}
}
}

View File

@ -0,0 +1,53 @@
use std::fmt;
#[cfg(feature="std")]
use std::any::Any;
#[cfg(feature="std")]
use std::error::Error;
/// Error value indicating insufficient capacity
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)]
pub struct CapacityError<T = ()> {
element: T,
}
impl<T> CapacityError<T> {
/// Create a new `CapacityError` from `element`.
pub fn new(element: T) -> CapacityError<T> {
CapacityError {
element: element,
}
}
/// Extract the overflowing element
pub fn element(self) -> T {
self.element
}
/// Convert into a `CapacityError` that does not carry an element.
pub fn simplify(self) -> CapacityError {
CapacityError { element: () }
}
}
const CAPERROR: &'static str = "insufficient capacity";
#[cfg(feature="std")]
/// Requires `features="std"`.
impl<T: Any> Error for CapacityError<T> {
fn description(&self) -> &str {
CAPERROR
}
}
impl<T> fmt::Display for CapacityError<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", CAPERROR)
}
}
impl<T> fmt::Debug for CapacityError<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}: {}", "CapacityError", CAPERROR)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,44 @@
use crate::array::Array;
use std::mem::MaybeUninit as StdMaybeUninit;
#[derive(Copy)]
pub struct MaybeUninit<T> {
inner: StdMaybeUninit<T>,
}
impl<T> Clone for MaybeUninit<T>
where T: Copy
{
fn clone(&self) -> Self { *self }
}
impl<T> MaybeUninit<T> {
/// Create a new MaybeUninit with uninitialized interior
pub unsafe fn uninitialized() -> Self {
MaybeUninit { inner: StdMaybeUninit::uninit() }
}
/// Create a new MaybeUninit from the value `v`.
pub fn from(v: T) -> Self {
MaybeUninit { inner: StdMaybeUninit::new(v) }
}
// Raw pointer casts written so that we don't reference or access the
// uninitialized interior value
/// Return a raw pointer to the start of the interior array
pub fn ptr(&self) -> *const T::Item
where T: Array
{
self.inner.as_ptr() as *const T::Item
}
/// Return a mut raw pointer to the start of the interior array
pub fn ptr_mut(&mut self) -> *mut T::Item
where T: Array
{
self.inner.as_mut_ptr() as *mut T::Item
}
}

View File

@ -0,0 +1,79 @@
#![cfg(feature = "serde")]
extern crate arrayvec;
extern crate serde_test;
mod array_vec {
use arrayvec::ArrayVec;
use serde_test::{Token, assert_tokens, assert_de_tokens_error};
#[test]
fn test_ser_de_empty() {
let vec = ArrayVec::<[u32; 0]>::new();
assert_tokens(&vec, &[
Token::Seq { len: Some(0) },
Token::SeqEnd,
]);
}
#[test]
fn test_ser_de() {
let mut vec = ArrayVec::<[u32; 3]>::new();
vec.push(20);
vec.push(55);
vec.push(123);
assert_tokens(&vec, &[
Token::Seq { len: Some(3) },
Token::U32(20),
Token::U32(55),
Token::U32(123),
Token::SeqEnd,
]);
}
#[test]
fn test_de_too_large() {
assert_de_tokens_error::<ArrayVec<[u32; 2]>>(&[
Token::Seq { len: Some(3) },
Token::U32(13),
Token::U32(42),
Token::U32(68),
], "invalid length 3, expected an array with no more than 2 items");
}
}
mod array_string {
use arrayvec::ArrayString;
use serde_test::{Token, assert_tokens, assert_de_tokens_error};
#[test]
fn test_ser_de_empty() {
let string = ArrayString::<[u8; 0]>::new();
assert_tokens(&string, &[
Token::Str(""),
]);
}
#[test]
fn test_ser_de() {
let string = ArrayString::<[u8; 9]>::from("1234 abcd")
.expect("expected exact specified capacity to be enough");
assert_tokens(&string, &[
Token::Str("1234 abcd"),
]);
}
#[test]
fn test_de_too_large() {
assert_de_tokens_error::<ArrayString<[u8; 2]>>(&[
Token::Str("afd")
], "invalid length 3, expected a string no more than 2 bytes long");
}
}

Some files were not shown because too many files have changed in this diff Show More