third_party/cargo: add mlua

master
q3k 2021-04-03 16:35:31 +00:00
parent 07436668f4
commit 2be6a7be48
164 changed files with 36055 additions and 0 deletions

View File

@ -57,6 +57,15 @@ alias(
],
)
alias(
name = "mlua",
actual = "//third_party/cargo/vendor/mlua-0.5.3:mlua",
tags = [
"cargo-raze",
"manual",
],
)
alias(
name = "proc_macro2",
actual = "//third_party/cargo/vendor/proc-macro2-1.0.26:proc_macro2",

23
third_party/cargo/Cargo.lock generated vendored
View File

@ -78,6 +78,15 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a"
[[package]]
name = "bstr"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a40b47ad93e1a5404e6c18dec46b628214fee441c70f4ab5d6942142cc268a3d"
dependencies = [
"memchr",
]
[[package]]
name = "bytemuck"
version = "1.5.1"
@ -189,6 +198,7 @@ dependencies = [
"flatbuffers",
"image",
"log",
"mlua",
"proc-macro2",
"quote",
"syn",
@ -839,6 +849,19 @@ dependencies = [
"ws2_32-sys",
]
[[package]]
name = "mlua"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f2fc8e1085d53b72898c59ceee1980b5826b0c98ce99886b7518f0ead00e5cb"
dependencies = [
"bstr",
"cc",
"lazy_static",
"num-traits",
"pkg-config",
]
[[package]]
name = "ndk"
version = "0.2.1"

View File

@ -19,6 +19,7 @@ flatbuffers = "0.6.1"
quote = "1.0.8"
syn = "1.0.58"
proc-macro2 = "1"
mlua = { version = "0.5", features = [ "luajit" ] }
[workspace.metadata.raze]
workspace_path = "//third_party/cargo"
@ -72,3 +73,6 @@ gen_buildrs = true
[workspace.metadata.raze.crates.proc-macro2.'1.0.26']
gen_buildrs = true
[workspace.metadata.raze.crates.mlua.'0.5.3']
gen_buildrs = true

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,71 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load(
"@rules_rust//rust:rust.bzl",
"rust_binary",
"rust_library",
"rust_test",
)
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # MIT from expression "MIT OR Apache-2.0"
])
# Generated Targets
# Unsupported target "graphemes" with type "example" omitted
# Unsupported target "graphemes-std" with type "example" omitted
# Unsupported target "lines" with type "example" omitted
# Unsupported target "lines-std" with type "example" omitted
# Unsupported target "uppercase" with type "example" omitted
# Unsupported target "uppercase-std" with type "example" omitted
# Unsupported target "words" with type "example" omitted
# Unsupported target "words-std" with type "example" omitted
rust_library(
name = "bstr",
srcs = glob(["**/*.rs"]),
crate_features = [
"std",
],
crate_root = "src/lib.rs",
crate_type = "lib",
data = [],
edition = "2015",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "0.2.15",
# buildifier: leave-alone
deps = [
"//third_party/cargo/vendor/memchr-2.3.4:memchr",
],
)

View File

@ -0,0 +1,8 @@
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
http://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT](LICENSE-MIT) or
http://opensource.org/licenses/MIT)
at your option.

134
third_party/cargo/vendor/bstr-0.2.15/Cargo.lock generated vendored Normal file
View File

@ -0,0 +1,134 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "bstr"
version = "0.2.15"
dependencies = [
"lazy_static",
"memchr",
"quickcheck",
"regex-automata",
"serde",
"ucd-parse",
"unicode-segmentation",
]
[[package]]
name = "byteorder"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "getrandom"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ccac4b00700875e6a07c6cde370d44d32fa01c5a65cdd2fca6858c479d28bb3"
[[package]]
name = "memchr"
version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
[[package]]
name = "quickcheck"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
dependencies = [
"rand",
]
[[package]]
name = "rand"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c026d7df8b298d90ccbbc5190bd04d85e159eaf5576caeacf8741da93ccbd2e5"
dependencies = [
"getrandom",
]
[[package]]
name = "regex"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a"
dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
dependencies = [
"byteorder",
]
[[package]]
name = "regex-syntax"
version = "0.6.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581"
[[package]]
name = "serde"
version = "1.0.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae"
[[package]]
name = "ucd-parse"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5269f8d35df6b8b60758343a6d742ecf09e4bca13faee32af5503aebd1e11b7c"
dependencies = [
"lazy_static",
"regex",
]
[[package]]
name = "unicode-segmentation"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796"
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"

View File

@ -0,0 +1,68 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "bstr"
version = "0.2.15"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
exclude = ["/.github"]
description = "A string type that is not required to be valid UTF-8."
homepage = "https://github.com/BurntSushi/bstr"
documentation = "https://docs.rs/bstr"
readme = "README.md"
keywords = ["string", "str", "byte", "bytes", "text"]
categories = ["text-processing", "encoding"]
license = "MIT OR Apache-2.0"
repository = "https://github.com/BurntSushi/bstr"
[profile.release]
debug = true
[lib]
bench = false
[dependencies.lazy_static]
version = "1.2"
optional = true
[dependencies.memchr]
version = "2.1.2"
default-features = false
[dependencies.regex-automata]
version = "0.1.5"
optional = true
default-features = false
[dependencies.serde]
version = "1.0.85"
optional = true
default-features = false
[dev-dependencies.quickcheck]
version = "1"
default-features = false
[dev-dependencies.ucd-parse]
version = "0.1.3"
[dev-dependencies.unicode-segmentation]
version = "1.2.1"
[features]
default = ["std", "unicode"]
serde1 = ["std", "serde1-nostd", "serde/std"]
serde1-nostd = ["serde"]
std = ["memchr/use_std"]
unicode = ["lazy_static", "regex-automata"]
[badges.appveyor]
repository = "BurntSushi/bstr"
[badges.travis-ci]
repository = "BurntSushi/bstr"

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2018-2019 Andrew Gallant
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -0,0 +1,254 @@
bstr
====
This crate provides extension traits for `&[u8]` and `Vec<u8>` that enable
their use as byte strings, where byte strings are _conventionally_ UTF-8. This
differs from the standard library's `String` and `str` types in that they are
not required to be valid UTF-8, but may be fully or partially valid UTF-8.
[![Build status](https://github.com/BurntSushi/bstr/workflows/ci/badge.svg)](https://github.com/BurntSushi/bstr/actions)
[![](https://meritbadge.herokuapp.com/bstr)](https://crates.io/crates/bstr)
### Documentation
https://docs.rs/bstr
### When should I use byte strings?
See this part of the documentation for more details:
https://docs.rs/bstr/0.2.*/bstr/#when-should-i-use-byte-strings.
The short story is that byte strings are useful when it is inconvenient or
incorrect to require valid UTF-8.
### Usage
Add this to your `Cargo.toml`:
```toml
[dependencies]
bstr = "0.2"
```
### Examples
The following two examples exhibit both the API features of byte strings and
the I/O convenience functions provided for reading line-by-line quickly.
This first example simply shows how to efficiently iterate over lines in
stdin, and print out lines containing a particular substring:
```rust
use std::error::Error;
use std::io::{self, Write};
use bstr::{ByteSlice, io::BufReadExt};
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut stdout = io::BufWriter::new(io::stdout());
stdin.lock().for_byte_line_with_terminator(|line| {
if line.contains_str("Dimension") {
stdout.write_all(line)?;
}
Ok(true)
})?;
Ok(())
}
```
This example shows how to count all of the words (Unicode-aware) in stdin,
line-by-line:
```rust
use std::error::Error;
use std::io;
use bstr::{ByteSlice, io::BufReadExt};
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut words = 0;
stdin.lock().for_byte_line_with_terminator(|line| {
words += line.words().count();
Ok(true)
})?;
println!("{}", words);
Ok(())
}
```
This example shows how to convert a stream on stdin to uppercase without
performing UTF-8 validation _and_ amortizing allocation. On standard ASCII
text, this is quite a bit faster than what you can (easily) do with standard
library APIs. (N.B. Any invalid UTF-8 bytes are passed through unchanged.)
```rust
use std::error::Error;
use std::io::{self, Write};
use bstr::{ByteSlice, io::BufReadExt};
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut stdout = io::BufWriter::new(io::stdout());
let mut upper = vec![];
stdin.lock().for_byte_line_with_terminator(|line| {
upper.clear();
line.to_uppercase_into(&mut upper);
stdout.write_all(&upper)?;
Ok(true)
})?;
Ok(())
}
```
This example shows how to extract the first 10 visual characters (as grapheme
clusters) from each line, where invalid UTF-8 sequences are generally treated
as a single character and are passed through correctly:
```rust
use std::error::Error;
use std::io::{self, Write};
use bstr::{ByteSlice, io::BufReadExt};
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut stdout = io::BufWriter::new(io::stdout());
stdin.lock().for_byte_line_with_terminator(|line| {
let end = line
.grapheme_indices()
.map(|(_, end, _)| end)
.take(10)
.last()
.unwrap_or(line.len());
stdout.write_all(line[..end].trim_end())?;
stdout.write_all(b"\n")?;
Ok(true)
})?;
Ok(())
}
```
### Cargo features
This crates comes with a few features that control standard library, serde
and Unicode support.
* `std` - **Enabled** by default. This provides APIs that require the standard
library, such as `Vec<u8>`.
* `unicode` - **Enabled** by default. This provides APIs that require sizable
Unicode data compiled into the binary. This includes, but is not limited to,
grapheme/word/sentence segmenters. When this is disabled, basic support such
as UTF-8 decoding is still included.
* `serde1` - **Disabled** by default. Enables implementations of serde traits
for the `BStr` and `BString` types.
* `serde1-nostd` - **Disabled** by default. Enables implementations of serde
traits for the `BStr` type only, intended for use without the standard
library. Generally, you either want `serde1` or `serde1-nostd`, not both.
### Minimum Rust version policy
This crate's minimum supported `rustc` version (MSRV) is `1.28.0`.
In general, this crate will be conservative with respect to the minimum
supported version of Rust. MSRV may be bumped in minor version releases.
### Future work
Since this is meant to be a core crate, getting a `1.0` release is a priority.
My hope is to move to `1.0` within the next year and commit to its API so that
`bstr` can be used as a public dependency.
A large part of the API surface area was taken from the standard library, so
from an API design perspective, a good portion of this crate should be mature.
The main differences from the standard library are in how the various substring
search routines work. The standard library provides generic infrastructure for
supporting different types of searches with a single method, where as this
library prefers to define new methods for each type of search and drop the
generic infrastructure.
Some _probable_ future considerations for APIs include, but are not limited to:
* A convenience layer on top of the `aho-corasick` crate.
* Unicode normalization.
* More sophisticated support for dealing with Unicode case, perhaps by
combining the use cases supported by [`caseless`](https://docs.rs/caseless)
and [`unicase`](https://docs.rs/unicase).
* Add facilities for dealing with OS strings and file paths, probably via
simple conversion routines.
Here are some examples that are _probably_ out of scope for this crate:
* Regular expressions.
* Unicode collation.
The exact scope isn't quite clear, but I expect we can iterate on it.
In general, as stated below, this crate is an experiment in bringing lots of
related APIs together into a single crate while simultaneously attempting to
keep the total number of dependencies low. Indeed, every dependency of `bstr`,
except for `memchr`, is optional.
### High level motivation
Strictly speaking, the `bstr` crate provides very little that can't already be
achieved with the standard library `Vec<u8>`/`&[u8]` APIs and the ecosystem of
library crates. For example:
* The standard library's
[`Utf8Error`](https://doc.rust-lang.org/std/str/struct.Utf8Error.html)
can be used for incremental lossy decoding of `&[u8]`.
* The
[`unicode-segmentation`](https://unicode-rs.github.io/unicode-segmentation/unicode_segmentation/index.html)
crate can be used for iterating over graphemes (or words), but is only
implemented for `&str` types. One could use `Utf8Error` above to implement
grapheme iteration with the same semantics as what `bstr` provides (automatic
Unicode replacement codepoint substitution).
* The [`twoway`](https://docs.rs/twoway) crate can be used for
fast substring searching on `&[u8]`.
So why create `bstr`? Part of the point of the `bstr` crate is to provide a
uniform API of coupled components instead of relying on users to piece together
loosely coupled components from the crate ecosystem. For example, if you wanted
to perform a search and replace in a `Vec<u8>`, then writing the code to do
that with the `twoway` crate is not that difficult, but it's still additional
glue code you have to write. This work adds up depending on what you're doing.
Consider, for example, trimming and splitting, along with their different
variants.
In other words, `bstr` is partially a way of pushing back against the
micro-crate ecosystem that appears to be evolving. It's not clear to me whether
this experiment will be successful or not, but it is definitely a goal of
`bstr` to keep its dependency list lightweight. For example, `serde` is an
optional dependency because there is no feasible alternative, but `twoway` is
not, where we instead prefer to implement our own substring search. In service
of this philosophy, currently, the only required dependency of `bstr` is
`memchr`.
### License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
https://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT](LICENSE-MIT) or
https://opensource.org/licenses/MIT)
at your option.
The data in `src/unicode/data/` is licensed under the Unicode License Agreement
([LICENSE-UNICODE](https://www.unicode.org/copyright.html#License)), although
this data is only used in tests.

View File

@ -0,0 +1,28 @@
extern crate unicode_segmentation;
use std::error::Error;
use std::io::{self, BufRead, Write};
use unicode_segmentation::UnicodeSegmentation;
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut stdin = stdin.lock();
let mut stdout = io::BufWriter::new(io::stdout());
let mut line = String::new();
while stdin.read_line(&mut line)? > 0 {
let end = line
.grapheme_indices(true)
.map(|(start, g)| start + g.len())
.take(10)
.last()
.unwrap_or(line.len());
#[allow(deprecated)] // for Rust 1.28.0
stdout.write_all(line[..end].trim_right().as_bytes())?;
stdout.write_all(b"\n")?;
line.clear();
}
Ok(())
}

View File

@ -0,0 +1,24 @@
extern crate bstr;
use std::error::Error;
use std::io::{self, Write};
use bstr::{io::BufReadExt, ByteSlice};
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut stdout = io::BufWriter::new(io::stdout());
stdin.lock().for_byte_line_with_terminator(|line| {
let end = line
.grapheme_indices()
.map(|(_, end, _)| end)
.take(10)
.last()
.unwrap_or(line.len());
stdout.write_all(line[..end].trim_end())?;
stdout.write_all(b"\n")?;
Ok(true)
})?;
Ok(())
}

View File

@ -0,0 +1,17 @@
use std::error::Error;
use std::io::{self, BufRead, Write};
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut stdin = stdin.lock();
let mut stdout = io::BufWriter::new(io::stdout());
let mut line = String::new();
while stdin.read_line(&mut line)? > 0 {
if line.contains("Dimension") {
stdout.write_all(line.as_bytes())?;
}
line.clear();
}
Ok(())
}

View File

@ -0,0 +1,19 @@
extern crate bstr;
use std::error::Error;
use std::io::{self, Write};
use bstr::{io::BufReadExt, ByteSlice};
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut stdout = io::BufWriter::new(io::stdout());
stdin.lock().for_byte_line_with_terminator(|line| {
if line.contains_str("Dimension") {
stdout.write_all(line)?;
}
Ok(true)
})?;
Ok(())
}

View File

@ -0,0 +1,15 @@
use std::error::Error;
use std::io::{self, BufRead, Write};
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut stdin = stdin.lock();
let mut stdout = io::BufWriter::new(io::stdout());
let mut line = String::new();
while stdin.read_line(&mut line)? > 0 {
stdout.write_all(line.to_uppercase().as_bytes())?;
line.clear();
}
Ok(())
}

View File

@ -0,0 +1,20 @@
extern crate bstr;
use std::error::Error;
use std::io::{self, Write};
use bstr::{io::BufReadExt, ByteSlice};
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut stdout = io::BufWriter::new(io::stdout());
let mut upper = vec![];
stdin.lock().for_byte_line_with_terminator(|line| {
upper.clear();
line.to_uppercase_into(&mut upper);
stdout.write_all(&upper)?;
Ok(true)
})?;
Ok(())
}

View File

@ -0,0 +1,20 @@
extern crate unicode_segmentation;
use std::error::Error;
use std::io::{self, BufRead};
use unicode_segmentation::UnicodeSegmentation;
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut stdin = stdin.lock();
let mut words = 0;
let mut line = String::new();
while stdin.read_line(&mut line)? > 0 {
words += line.unicode_words().count();
line.clear();
}
println!("{}", words);
Ok(())
}

View File

@ -0,0 +1,17 @@
extern crate bstr;
use std::error::Error;
use std::io;
use bstr::{io::BufReadExt, ByteSlice};
fn main() -> Result<(), Box<dyn Error>> {
let stdin = io::stdin();
let mut words = 0;
stdin.lock().for_byte_line_with_terminator(|line| {
words += line.words().count();
Ok(true)
})?;
println!("{}", words);
Ok(())
}

View File

@ -0,0 +1,2 @@
max_width = 79
use_small_heuristics = "max"

View File

@ -0,0 +1,149 @@
#!/bin/sh
set -e
D="$(dirname "$0")"
# Convenience function for checking that a command exists.
requires() {
cmd="$1"
if ! command -v "$cmd" > /dev/null 2>&1; then
echo "DEPENDENCY MISSING: $cmd must be installed" >&2
exit 1
fi
}
# Test if an array ($2) contains a particular element ($1).
array_exists() {
needle="$1"
shift
for el in "$@"; do
if [ "$el" = "$needle" ]; then
return 0
fi
done
return 1
}
graphemes() {
regex="$(sh "$D/regex/grapheme.sh")"
echo "generating forward grapheme DFA"
ucd-generate dfa \
--name GRAPHEME_BREAK_FWD \
--sparse --minimize --anchored --state-size 2 \
src/unicode/fsm/ \
"$regex"
echo "generating reverse grapheme DFA"
ucd-generate dfa \
--name GRAPHEME_BREAK_REV \
--reverse --longest \
--sparse --minimize --anchored --state-size 2 \
src/unicode/fsm/ \
"$regex"
}
words() {
regex="$(sh "$D/regex/word.sh")"
echo "generating forward word DFA (this can take a while)"
ucd-generate dfa \
--name WORD_BREAK_FWD \
--sparse --minimize --anchored --state-size 4 \
src/unicode/fsm/ \
"$regex"
}
sentences() {
regex="$(sh "$D/regex/sentence.sh")"
echo "generating forward sentence DFA (this can take a while)"
ucd-generate dfa \
--name SENTENCE_BREAK_FWD \
--minimize \
--sparse --anchored --state-size 4 \
src/unicode/fsm/ \
"$regex"
}
regional_indicator() {
# For finding all occurrences of region indicators. This is used to handle
# regional indicators as a special case for the reverse grapheme iterator
# and the reverse word iterator.
echo "generating regional indicator DFA"
ucd-generate dfa \
--name REGIONAL_INDICATOR_REV \
--reverse \
--classes --minimize --anchored --premultiply --state-size 1 \
src/unicode/fsm/ \
"\p{gcb=Regional_Indicator}"
}
simple_word() {
echo "generating forward simple word DFA"
ucd-generate dfa \
--name SIMPLE_WORD_FWD \
--sparse --minimize --state-size 2 \
src/unicode/fsm/ \
"\w"
}
whitespace() {
echo "generating forward whitespace DFA"
ucd-generate dfa \
--name WHITESPACE_ANCHORED_FWD \
--anchored --classes --premultiply --minimize --state-size 1 \
src/unicode/fsm/ \
"\s+"
echo "generating reverse whitespace DFA"
ucd-generate dfa \
--name WHITESPACE_ANCHORED_REV \
--reverse \
--anchored --classes --premultiply --minimize --state-size 1 \
src/unicode/fsm/ \
"\s+"
}
main() {
if array_exists "-h" "$@" || array_exists "--help" "$@"; then
echo "Usage: $(basename "$0") [--list-commands] [<command>] ..." >&2
exit
fi
commands="
graphemes
sentences
words
regional-indicator
simple-word
whitespace
"
if array_exists "--list-commands" "$@"; then
for cmd in $commands; do
echo "$cmd"
done
exit
fi
# ucd-generate is used to compile regexes into DFAs.
requires ucd-generate
mkdir -p src/unicode/fsm/
cmds=$*
if [ $# -eq 0 ] || array_exists "all" "$@"; then
cmds=$commands
fi
for cmd in $cmds; do
if array_exists "$cmd" $commands; then
fun="$(echo "$cmd" | sed 's/-/_/g')"
eval "$fun"
else
echo "unrecognized command: $cmd" >&2
fi
done
}
main "$@"

View File

@ -0,0 +1,50 @@
#!/bin/sh
# vim: indentexpr= nosmartindent autoindent
# vim: tabstop=2 shiftwidth=2 softtabstop=2
# This regex was manually written, derived from the rules in UAX #29.
# Particularly, from Table 1c, which lays out a regex for grapheme clusters.
CR="\p{gcb=CR}"
LF="\p{gcb=LF}"
Control="\p{gcb=Control}"
Prepend="\p{gcb=Prepend}"
L="\p{gcb=L}"
V="\p{gcb=V}"
LV="\p{gcb=LV}"
LVT="\p{gcb=LVT}"
T="\p{gcb=T}"
RI="\p{gcb=RI}"
Extend="\p{gcb=Extend}"
ZWJ="\p{gcb=ZWJ}"
SpacingMark="\p{gcb=SpacingMark}"
Any="\p{any}"
ExtendPict="\p{Extended_Pictographic}"
echo "(?x)
$CR $LF
|
$Control
|
$Prepend*
(
(
($L* ($V+ | $LV $V* | $LVT) $T*)
|
$L+
|
$T+
)
|
$RI $RI
|
$ExtendPict ($Extend* $ZWJ $ExtendPict)*
|
[^$Control $CR $LF]
)
[$Extend $ZWJ $SpacingMark]*
|
$Any
"

View File

@ -0,0 +1,176 @@
#!/bin/sh
# vim: indentexpr= nosmartindent autoindent
# vim: tabstop=2 shiftwidth=2 softtabstop=2
# This is a regex that I reverse engineered from the sentence boundary chain
# rules in UAX #29. Unlike the grapheme regex, which is essentially provided
# for us in UAX #29, no such sentence regex exists.
#
# I looked into how ICU achieves this, since UAX #29 hints that producing
# finite state machines for grapheme/sentence/word/line breaking is possible,
# but only easy to do for graphemes. ICU does this by implementing their own
# DSL for describing the break algorithms in terms of the chaining rules
# directly. You can see an example for sentences in
# icu4c/source/data/brkitr/rules/sent.txt. ICU then builds a finite state
# machine from those rules in a mostly standard way, but implements the
# "chaining" aspect of the rules by connecting overlapping end and start
# states. For example, given SB7:
#
# (Upper | Lower) ATerm x Upper
#
# Then the naive way to convert this into a regex would be something like
#
# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper}
#
# Unfortunately, this is incorrect. Why? Well, consider an example like so:
#
# U.S.A.
#
# A correct implementation of the sentence breaking algorithm should not insert
# any breaks here, exactly in accordance with repeatedly applying rule SB7 as
# given above. Our regex fails to do this because it will first match `U.S`
# without breaking them---which is correct---but will then start looking for
# its next rule beginning with a full stop (in ATerm) and followed by an
# uppercase letter (A). This will wind up triggering rule SB11 (without
# matching `A`), which inserts a break.
#
# The reason why this happens is because our initial application of rule SB7
# "consumes" the next uppercase letter (S), which we want to reuse as a prefix
# in the next rule application. A natural way to express this would be with
# look-around, although it's not clear that works in every case since you
# ultimately might want to consume that ending uppercase letter. In any case,
# we can't use look-around in our truly regular regexes, so we must fix this.
# The approach we take is to explicitly repeat rules when a suffix of a rule
# is a prefix of another rule. In the case of SB7, the end of the rule, an
# uppercase letter, also happens to match the beginning of the rule. This can
# in turn be repeated indefinitely. Thus, our actual translation to a regex is:
#
# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper}(\p{sb=ATerm}\p{sb=Upper}*
#
# It turns out that this is exactly what ICU does, but in their case, they do
# it automatically. In our case, we connect the chaining rules manually. It's
# tedious. With that said, we do no implement Unicode line breaking with this
# approach, which is a far scarier beast. In that case, it would probably be
# worth writing the code to do what ICU does.
#
# In the case of sentence breaks, there aren't *too* many overlaps of this
# nature. We list them out exhaustively to make this clear, because it's
# essentially impossible to easily observe this in the regex. (It took me a
# full day to figure all of this out.) Rules marked with N/A mean that they
# specify a break, and this strategy only really applies to stringing together
# non-breaks.
#
# SB1 - N/A
# SB2 - N/A
# SB3 - None
# SB4 - N/A
# SB5 - None
# SB6 - None
# SB7 - End overlaps with beginning of SB7
# SB8 - End overlaps with beginning of SB7
# SB8a - End overlaps with beginning of SB6, SB8, SB8a, SB9, SB10, SB11
# SB9 - None
# SB10 - None
# SB11 - None
# SB998 - N/A
#
# SB8a is in particular quite tricky to get right without look-ahead, since it
# allows ping-ponging between match rules SB8a and SB9-11, where SB9-11
# otherwise indicate that a break has been found. In the regex below, we tackle
# this by only permitting part of SB8a to match inside our core non-breaking
# repetition. In particular, we only allow the parts of SB8a to match that
# permit the non-breaking components to continue. If a part of SB8a matches
# that guarantees a pop out to SB9-11, (like `STerm STerm`), then we let it
# happen. This still isn't correct because an SContinue might be seen which
# would allow moving back into SB998 and thus the non-breaking repetition, so
# we handle that case as well.
#
# Finally, the last complication here is the sprinkling of $Ex* everywhere.
# This essentially corresponds to the implementation of SB5 by following
# UAX #29's recommendation in S6.2. Essentially, we use it avoid ever breaking
# in the middle of a grapheme cluster.
CR="\p{sb=CR}"
LF="\p{sb=LF}"
Sep="\p{sb=Sep}"
Close="\p{sb=Close}"
Sp="\p{sb=Sp}"
STerm="\p{sb=STerm}"
ATerm="\p{sb=ATerm}"
SContinue="\p{sb=SContinue}"
Numeric="\p{sb=Numeric}"
Upper="\p{sb=Upper}"
Lower="\p{sb=Lower}"
OLetter="\p{sb=OLetter}"
Ex="[\p{sb=Extend}\p{sb=Format}]"
ParaSep="[$Sep $CR $LF]"
SATerm="[$STerm $ATerm]"
LetterSepTerm="[$OLetter $Upper $Lower $ParaSep $SATerm]"
echo "(?x)
(
# SB6
$ATerm $Ex*
$Numeric
|
# SB7
[$Upper $Lower] $Ex* $ATerm $Ex*
$Upper $Ex*
# overlap with SB7
($ATerm $Ex* $Upper $Ex*)*
|
# SB8
$ATerm $Ex* $Close* $Ex* $Sp* $Ex*
([^$LetterSepTerm] $Ex*)* $Lower $Ex*
# overlap with SB7
($ATerm $Ex* $Upper $Ex*)*
|
# SB8a
$SATerm $Ex* $Close* $Ex* $Sp* $Ex*
(
$SContinue
|
$ATerm $Ex*
# Permit repetition of SB8a
(($Close $Ex*)* ($Sp $Ex*)* $SATerm)*
# In order to continue non-breaking matching, we now must observe
# a match with a rule that keeps us in SB6-8a. Otherwise, we've entered
# one of SB9-11 and know that a break must follow.
(
# overlap with SB6
$Numeric
|
# overlap with SB8
($Close $Ex*)* ($Sp $Ex*)*
([^$LetterSepTerm] $Ex*)* $Lower $Ex*
# overlap with SB7
($ATerm $Ex* $Upper $Ex*)*
|
# overlap with SB8a
($Close $Ex*)* ($Sp $Ex*)* $SContinue
)
|
$STerm $Ex*
# Permit repetition of SB8a
(($Close $Ex*)* ($Sp $Ex*)* $SATerm)*
# As with ATerm above, in order to continue non-breaking matching, we
# must now observe a match with a rule that keeps us out of SB9-11.
# For STerm, the only such possibility is to see an SContinue. Anything
# else will result in a break.
($Close $Ex*)* ($Sp $Ex*)* $SContinue
)
|
# SB998
# The logic behind this catch-all is that if we get to this point and
# see a Sep, CR, LF, STerm or ATerm, then it has to fall into one of
# SB9, SB10 or SB11. In the cases of SB9-11, we always find a break since
# SB11 acts as a catch-all to induce a break following a SATerm that isn't
# handled by rules SB6-SB8a.
[^$ParaSep $SATerm]
)*
# The following collapses rules SB3, SB4, part of SB8a, SB9, SB10 and SB11.
($SATerm $Ex* ($Close $Ex*)* ($Sp $Ex*)*)* ($CR $LF | $ParaSep)?
"

View File

@ -0,0 +1,111 @@
#!/bin/sh
# vim: indentexpr= nosmartindent autoindent
# vim: tabstop=2 shiftwidth=2 softtabstop=2
# See the comments in regex/sentence.sh for the general approach to how this
# regex was written.
#
# Writing the regex for this was *hard*. It took me two days of hacking to get
# this far, and that was after I had finished the sentence regex, so my brain
# was fully cached on this. Unlike the sentence regex, the rules in the regex
# below don't correspond as nicely to the rules in UAX #29. In particular, the
# UAX #29 rules have a ton of overlap with each other, which requires crazy
# stuff in the regex. I'm not even sure the regex below is 100% correct or even
# minimal, however, I did compare this with the ICU word segmenter on a few
# different corpora, and it produces identical results. (In addition to of
# course passing the UCD tests.)
#
# In general, I consider this approach to be a failure. Firstly, this is
# clearly a write-only regex. Secondly, building the minimized DFA for this is
# incredibly slow. Thirdly, the DFA is itself very large (~240KB). Fourthly,
# reversing this regex (for reverse word iteration) results in a >19MB DFA.
# Yes. That's MB. Wat. And it took 5 minutes to build.
#
# I think we might consider changing our approach to this problem. The normal
# path I've seen, I think, is to decode codepoints one at a time, and then
# thread them through a state machine in the code itself. We could take this
# approach, or possibly combine it with a DFA that tells us which Word_Break
# value a codepoint has. I'd prefer the latter approach, but it requires adding
# RegexSet support to regex-automata. Something that should definitely be done,
# but is a fair amount of work.
#
# Gah.
CR="\p{wb=CR}"
LF="\p{wb=LF}"
Newline="\p{wb=Newline}"
ZWJ="\p{wb=ZWJ}"
RI="\p{wb=Regional_Indicator}"
Katakana="\p{wb=Katakana}"
HebrewLet="\p{wb=HebrewLetter}"
ALetter="\p{wb=ALetter}"
SingleQuote="\p{wb=SingleQuote}"
DoubleQuote="\p{wb=DoubleQuote}"
MidNumLet="\p{wb=MidNumLet}"
MidLetter="\p{wb=MidLetter}"
MidNum="\p{wb=MidNum}"
Numeric="\p{wb=Numeric}"
ExtendNumLet="\p{wb=ExtendNumLet}"
WSegSpace="\p{wb=WSegSpace}"
Any="\p{any}"
Ex="[\p{wb=Extend} \p{wb=Format} $ZWJ]"
ExtendPict="\p{Extended_Pictographic}"
AHLetter="[$ALetter $HebrewLet]"
MidNumLetQ="[$MidNumLet $SingleQuote]"
AHLetterRepeat="$AHLetter $Ex* ([$MidLetter $MidNumLetQ] $Ex* $AHLetter $Ex*)*"
NumericRepeat="$Numeric $Ex* ([$MidNum $MidNumLetQ] $Ex* $Numeric $Ex*)*"
echo "(?x)
$CR $LF
|
[$Newline $CR $LF]
|
$WSegSpace $WSegSpace+
|
(
([^$Newline $CR $LF]? $Ex* $ZWJ $ExtendPict $Ex*)+
|
($ExtendNumLet $Ex*)* $AHLetter $Ex*
(
(
($NumericRepeat | $ExtendNumLet $Ex*)*
|
[$MidLetter $MidNumLetQ] $Ex*
)
$AHLetter $Ex*
)+
($NumericRepeat | $ExtendNumLet $Ex*)*
|
($ExtendNumLet $Ex*)* $AHLetter $Ex* ($NumericRepeat | $ExtendNumLet $Ex*)+
|
($ExtendNumLet $Ex*)* $Numeric $Ex*
(
(
($AHLetterRepeat | $ExtendNumLet $Ex*)*
|
[$MidNum $MidNumLetQ] $Ex*
)
$Numeric $Ex*
)+
($AHLetterRepeat | $ExtendNumLet $Ex*)*
|
($ExtendNumLet $Ex*)* $Numeric $Ex* ($AHLetterRepeat | $ExtendNumLet $Ex*)+
|
$Katakana $Ex*
(($Katakana | $ExtendNumLet) $Ex*)+
|
$ExtendNumLet $Ex*
(($ExtendNumLet | $AHLetter | $Numeric | $Katakana) $Ex*)+
)+
|
$HebrewLet $Ex* $SingleQuote $Ex*
|
($HebrewLet $Ex* $DoubleQuote $Ex*)+ $HebrewLet $Ex*
|
$RI $Ex* $RI $Ex*
|
$Any $Ex*
"

View File

@ -0,0 +1,336 @@
use core::mem;
// The following ~400 lines of code exists for exactly one purpose, which is
// to optimize this code:
//
// byte_slice.iter().position(|&b| b > 0x7F).unwrap_or(byte_slice.len())
//
// Yes... Overengineered is a word that comes to mind, but this is effectively
// a very similar problem to memchr, and virtually nobody has been able to
// resist optimizing the crap out of that (except for perhaps the BSD and MUSL
// folks). In particular, this routine makes a very common case (ASCII) very
// fast, which seems worth it. We do stop short of adding AVX variants of the
// code below in order to retain our sanity and also to avoid needing to deal
// with runtime target feature detection. RESIST!
//
// In order to understand the SIMD version below, it would be good to read this
// comment describing how my memchr routine works:
// https://github.com/BurntSushi/rust-memchr/blob/b0a29f267f4a7fad8ffcc8fe8377a06498202883/src/x86/sse2.rs#L19-L106
//
// The primary difference with memchr is that for ASCII, we can do a bit less
// work. In particular, we don't need to detect the presence of a specific
// byte, but rather, whether any byte has its most significant bit set. That
// means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to
// _mm_movemask_epi8.
#[cfg(any(test, not(target_arch = "x86_64")))]
const USIZE_BYTES: usize = mem::size_of::<usize>();
#[cfg(any(test, not(target_arch = "x86_64")))]
const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES;
// This is a mask where the most significant bit of each byte in the usize
// is set. We test this bit to determine whether a character is ASCII or not.
// Namely, a single byte is regarded as an ASCII codepoint if and only if it's
// most significant bit is not set.
#[cfg(any(test, not(target_arch = "x86_64")))]
const ASCII_MASK_U64: u64 = 0x8080808080808080;
#[cfg(any(test, not(target_arch = "x86_64")))]
const ASCII_MASK: usize = ASCII_MASK_U64 as usize;
/// Returns the index of the first non ASCII byte in the given slice.
///
/// If slice only contains ASCII bytes, then the length of the slice is
/// returned.
pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
#[cfg(not(target_arch = "x86_64"))]
{
first_non_ascii_byte_fallback(slice)
}
#[cfg(target_arch = "x86_64")]
{
first_non_ascii_byte_sse2(slice)
}
}
#[cfg(any(test, not(target_arch = "x86_64")))]
fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
let align = USIZE_BYTES - 1;
let start_ptr = slice.as_ptr();
let end_ptr = slice[slice.len()..].as_ptr();
let mut ptr = start_ptr;
unsafe {
if slice.len() < USIZE_BYTES {
return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
}
let chunk = read_unaligned_usize(ptr);
let mask = chunk & ASCII_MASK;
if mask != 0 {
return first_non_ascii_byte_mask(mask);
}
ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
debug_assert!(ptr > start_ptr);
debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
if slice.len() >= FALLBACK_LOOP_SIZE {
while ptr <= ptr_sub(end_ptr, FALLBACK_LOOP_SIZE) {
debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
let a = *(ptr as *const usize);
let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
if (a | b) & ASCII_MASK != 0 {
// What a kludge. We wrap the position finding code into
// a non-inlineable function, which makes the codegen in
// the tight loop above a bit better by avoiding a
// couple extra movs. We pay for it by two additional
// stores, but only in the case of finding a non-ASCII
// byte.
#[inline(never)]
unsafe fn findpos(
start_ptr: *const u8,
ptr: *const u8,
) -> usize {
let a = *(ptr as *const usize);
let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
let mut at = sub(ptr, start_ptr);
let maska = a & ASCII_MASK;
if maska != 0 {
return at + first_non_ascii_byte_mask(maska);
}
at += USIZE_BYTES;
let maskb = b & ASCII_MASK;
debug_assert!(maskb != 0);
return at + first_non_ascii_byte_mask(maskb);
}
return findpos(start_ptr, ptr);
}
ptr = ptr_add(ptr, FALLBACK_LOOP_SIZE);
}
}
first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
}
}
#[cfg(target_arch = "x86_64")]
fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
use core::arch::x86_64::*;
const VECTOR_SIZE: usize = mem::size_of::<__m128i>();
const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE;
let start_ptr = slice.as_ptr();
let end_ptr = slice[slice.len()..].as_ptr();
let mut ptr = start_ptr;
unsafe {
if slice.len() < VECTOR_SIZE {
return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
}
let chunk = _mm_loadu_si128(ptr as *const __m128i);
let mask = _mm_movemask_epi8(chunk);
if mask != 0 {
return mask.trailing_zeros() as usize;
}
ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
debug_assert!(ptr > start_ptr);
debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr);
if slice.len() >= VECTOR_LOOP_SIZE {
while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) {
debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
let a = _mm_load_si128(ptr as *const __m128i);
let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
let c =
_mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
let d =
_mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
let or1 = _mm_or_si128(a, b);
let or2 = _mm_or_si128(c, d);
let or3 = _mm_or_si128(or1, or2);
if _mm_movemask_epi8(or3) != 0 {
let mut at = sub(ptr, start_ptr);
let mask = _mm_movemask_epi8(a);
if mask != 0 {
return at + mask.trailing_zeros() as usize;
}
at += VECTOR_SIZE;
let mask = _mm_movemask_epi8(b);
if mask != 0 {
return at + mask.trailing_zeros() as usize;
}
at += VECTOR_SIZE;
let mask = _mm_movemask_epi8(c);
if mask != 0 {
return at + mask.trailing_zeros() as usize;
}
at += VECTOR_SIZE;
let mask = _mm_movemask_epi8(d);
debug_assert!(mask != 0);
return at + mask.trailing_zeros() as usize;
}
ptr = ptr_add(ptr, VECTOR_LOOP_SIZE);
}
}
while ptr <= end_ptr.sub(VECTOR_SIZE) {
debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
let chunk = _mm_loadu_si128(ptr as *const __m128i);
let mask = _mm_movemask_epi8(chunk);
if mask != 0 {
return sub(ptr, start_ptr) + mask.trailing_zeros() as usize;
}
ptr = ptr.add(VECTOR_SIZE);
}
first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
}
}
#[inline(always)]
unsafe fn first_non_ascii_byte_slow(
start_ptr: *const u8,
end_ptr: *const u8,
mut ptr: *const u8,
) -> usize {
debug_assert!(start_ptr <= ptr);
debug_assert!(ptr <= end_ptr);
while ptr < end_ptr {
if *ptr > 0x7F {
return sub(ptr, start_ptr);
}
ptr = ptr.offset(1);
}
sub(end_ptr, start_ptr)
}
/// Compute the position of the first ASCII byte in the given mask.
///
/// The mask should be computed by `chunk & ASCII_MASK`, where `chunk` is
/// 8 contiguous bytes of the slice being checked where *at least* one of those
/// bytes is not an ASCII byte.
///
/// The position returned is always in the inclusive range [0, 7].
#[cfg(any(test, not(target_arch = "x86_64")))]
fn first_non_ascii_byte_mask(mask: usize) -> usize {
#[cfg(target_endian = "little")]
{
mask.trailing_zeros() as usize / 8
}
#[cfg(target_endian = "big")]
{
mask.leading_zeros() as usize / 8
}
}
/// Increment the given pointer by the given amount.
unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
debug_assert!(amt < ::core::isize::MAX as usize);
ptr.offset(amt as isize)
}
/// Decrement the given pointer by the given amount.
unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
debug_assert!(amt < ::core::isize::MAX as usize);
ptr.offset((amt as isize).wrapping_neg())
}
#[cfg(any(test, not(target_arch = "x86_64")))]
unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
use core::ptr;
let mut n: usize = 0;
ptr::copy_nonoverlapping(ptr, &mut n as *mut _ as *mut u8, USIZE_BYTES);
n
}
/// Subtract `b` from `a` and return the difference. `a` should be greater than
/// or equal to `b`.
fn sub(a: *const u8, b: *const u8) -> usize {
debug_assert!(a >= b);
(a as usize) - (b as usize)
}
#[cfg(test)]
mod tests {
use super::*;
// Our testing approach here is to try and exhaustively test every case.
// This includes the position at which a non-ASCII byte occurs in addition
// to the alignment of the slice that we're searching.
#[test]
fn positive_fallback_forward() {
for i in 0..517 {
let s = "a".repeat(i);
assert_eq!(
i,
first_non_ascii_byte_fallback(s.as_bytes()),
"i: {:?}, len: {:?}, s: {:?}",
i,
s.len(),
s
);
}
}
#[test]
#[cfg(target_arch = "x86_64")]
fn positive_sse2_forward() {
for i in 0..517 {
let b = "a".repeat(i).into_bytes();
assert_eq!(b.len(), first_non_ascii_byte_sse2(&b));
}
}
#[test]
fn negative_fallback_forward() {
for i in 0..517 {
for align in 0..65 {
let mut s = "a".repeat(i);
s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
let s = s.get(align..).unwrap_or("");
assert_eq!(
i.saturating_sub(align),
first_non_ascii_byte_fallback(s.as_bytes()),
"i: {:?}, align: {:?}, len: {:?}, s: {:?}",
i,
align,
s.len(),
s
);
}
}
}
#[test]
#[cfg(target_arch = "x86_64")]
fn negative_sse2_forward() {
for i in 0..517 {
for align in 0..65 {
let mut s = "a".repeat(i);
s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
let s = s.get(align..).unwrap_or("");
assert_eq!(
i.saturating_sub(align),
first_non_ascii_byte_sse2(s.as_bytes()),
"i: {:?}, align: {:?}, len: {:?}, s: {:?}",
i,
align,
s.len(),
s
);
}
}
}
}

View File

@ -0,0 +1,74 @@
use core::mem;
/// A wrapper for `&[u8]` that provides convenient string oriented trait impls.
///
/// If you need ownership or a growable byte string buffer, then use
/// [`BString`](struct.BString.html).
///
/// Using a `&BStr` is just like using a `&[u8]`, since `BStr`
/// implements `Deref` to `[u8]`. So all methods available on `[u8]`
/// are also available on `BStr`.
///
/// # Representation
///
/// A `&BStr` has the same representation as a `&str`. That is, a `&BStr` is
/// a fat pointer which consists of a pointer to some bytes and a length.
///
/// # Trait implementations
///
/// The `BStr` type has a number of trait implementations, and in particular,
/// defines equality and ordinal comparisons between `&BStr`, `&str` and
/// `&[u8]` for convenience.
///
/// The `Debug` implementation for `BStr` shows its bytes as a normal string.
/// For invalid UTF-8, hex escape sequences are used.
///
/// The `Display` implementation behaves as if `BStr` were first lossily
/// converted to a `str`. Invalid UTF-8 bytes are substituted with the Unicode
/// replacement codepoint, which looks like this: <20>.
#[derive(Hash)]
#[repr(transparent)]
pub struct BStr {
pub(crate) bytes: [u8],
}
impl BStr {
#[inline]
pub(crate) fn new<B: ?Sized + AsRef<[u8]>>(bytes: &B) -> &BStr {
BStr::from_bytes(bytes.as_ref())
}
#[inline]
pub(crate) fn new_mut<B: ?Sized + AsMut<[u8]>>(
bytes: &mut B,
) -> &mut BStr {
BStr::from_bytes_mut(bytes.as_mut())
}
#[inline]
pub(crate) fn from_bytes(slice: &[u8]) -> &BStr {
unsafe { mem::transmute(slice) }
}
#[inline]
pub(crate) fn from_bytes_mut(slice: &mut [u8]) -> &mut BStr {
unsafe { mem::transmute(slice) }
}
#[inline]
#[cfg(feature = "std")]
pub(crate) fn from_boxed_bytes(slice: Box<[u8]>) -> Box<BStr> {
unsafe { Box::from_raw(Box::into_raw(slice) as _) }
}
#[inline]
#[cfg(feature = "std")]
pub(crate) fn into_boxed_bytes(slice: Box<BStr>) -> Box<[u8]> {
unsafe { Box::from_raw(Box::into_raw(slice) as _) }
}
#[inline]
pub(crate) fn as_bytes(&self) -> &[u8] {
&self.bytes
}
}

View File

@ -0,0 +1,59 @@
use bstr::BStr;
/// A wrapper for `Vec<u8>` that provides convenient string oriented trait
/// impls.
///
/// A `BString` has ownership over its contents and corresponds to
/// a growable or shrinkable buffer. Its borrowed counterpart is a
/// [`BStr`](struct.BStr.html), called a byte string slice.
///
/// Using a `BString` is just like using a `Vec<u8>`, since `BString`
/// implements `Deref` to `Vec<u8>`. So all methods available on `Vec<u8>`
/// are also available on `BString`.
///
/// # Examples
///
/// You can create a new `BString` from a `Vec<u8>` via a `From` impl:
///
/// ```
/// use bstr::BString;
///
/// let s = BString::from("Hello, world!");
/// ```
///
/// # Deref
///
/// The `BString` type implements `Deref` and `DerefMut`, where the target
/// types are `&Vec<u8>` and `&mut Vec<u8>`, respectively. `Deref` permits all of the
/// methods defined on `Vec<u8>` to be implicitly callable on any `BString`.
///
/// For more information about how deref works, see the documentation for the
/// [`std::ops::Deref`](https://doc.rust-lang.org/std/ops/trait.Deref.html)
/// trait.
///
/// # Representation
///
/// A `BString` has the same representation as a `Vec<u8>` and a `String`.
/// That is, it is made up of three word sized components: a pointer to a
/// region of memory containing the bytes, a length and a capacity.
#[derive(Clone, Hash)]
pub struct BString {
pub(crate) bytes: Vec<u8>,
}
impl BString {
#[inline]
pub(crate) fn as_bytes(&self) -> &[u8] {
&self.bytes
}
#[inline]
pub(crate) fn as_bstr(&self) -> &BStr {
BStr::new(&self.bytes)
}
#[inline]
pub(crate) fn as_mut_bstr(&mut self) -> &mut BStr {
BStr::new_mut(&mut self.bytes)
}
}

View File

@ -0,0 +1,115 @@
use memchr::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
mod scalar;
#[inline]
fn build_table(byteset: &[u8]) -> [u8; 256] {
let mut table = [0u8; 256];
for &b in byteset {
table[b as usize] = 1;
}
table
}
#[inline]
pub(crate) fn find(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
match byteset.len() {
0 => return None,
1 => memchr(byteset[0], haystack),
2 => memchr2(byteset[0], byteset[1], haystack),
3 => memchr3(byteset[0], byteset[1], byteset[2], haystack),
_ => {
let table = build_table(byteset);
scalar::forward_search_bytes(haystack, |b| table[b as usize] != 0)
}
}
}
#[inline]
pub(crate) fn rfind(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
match byteset.len() {
0 => return None,
1 => memrchr(byteset[0], haystack),
2 => memrchr2(byteset[0], byteset[1], haystack),
3 => memrchr3(byteset[0], byteset[1], byteset[2], haystack),
_ => {
let table = build_table(byteset);
scalar::reverse_search_bytes(haystack, |b| table[b as usize] != 0)
}
}
}
#[inline]
pub(crate) fn find_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
if haystack.is_empty() {
return None;
}
match byteset.len() {
0 => return Some(0),
1 => scalar::inv_memchr(byteset[0], haystack),
2 => scalar::forward_search_bytes(haystack, |b| {
b != byteset[0] && b != byteset[1]
}),
3 => scalar::forward_search_bytes(haystack, |b| {
b != byteset[0] && b != byteset[1] && b != byteset[2]
}),
_ => {
let table = build_table(byteset);
scalar::forward_search_bytes(haystack, |b| table[b as usize] == 0)
}
}
}
#[inline]
pub(crate) fn rfind_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
if haystack.is_empty() {
return None;
}
match byteset.len() {
0 => return Some(haystack.len() - 1),
1 => scalar::inv_memrchr(byteset[0], haystack),
2 => scalar::reverse_search_bytes(haystack, |b| {
b != byteset[0] && b != byteset[1]
}),
3 => scalar::reverse_search_bytes(haystack, |b| {
b != byteset[0] && b != byteset[1] && b != byteset[2]
}),
_ => {
let table = build_table(byteset);
scalar::reverse_search_bytes(haystack, |b| table[b as usize] == 0)
}
}
}
#[cfg(test)]
mod tests {
quickcheck! {
fn qc_byteset_forward_matches_naive(
haystack: Vec<u8>,
needles: Vec<u8>
) -> bool {
super::find(&haystack, &needles)
== haystack.iter().position(|b| needles.contains(b))
}
fn qc_byteset_backwards_matches_naive(
haystack: Vec<u8>,
needles: Vec<u8>
) -> bool {
super::rfind(&haystack, &needles)
== haystack.iter().rposition(|b| needles.contains(b))
}
fn qc_byteset_forward_not_matches_naive(
haystack: Vec<u8>,
needles: Vec<u8>
) -> bool {
super::find_not(&haystack, &needles)
== haystack.iter().position(|b| !needles.contains(b))
}
fn qc_byteset_backwards_not_matches_naive(
haystack: Vec<u8>,
needles: Vec<u8>
) -> bool {
super::rfind_not(&haystack, &needles)
== haystack.iter().rposition(|b| !needles.contains(b))
}
}
}

View File

@ -0,0 +1,295 @@
// This is adapted from `fallback.rs` from rust-memchr. It's modified to return
// the 'inverse' query of memchr, e.g. finding the first byte not in the provided
// set. This is simple for the 1-byte case.
use core::cmp;
use core::usize;
#[cfg(target_pointer_width = "32")]
const USIZE_BYTES: usize = 4;
#[cfg(target_pointer_width = "64")]
const USIZE_BYTES: usize = 8;
// The number of bytes to loop at in one iteration of memchr/memrchr.
const LOOP_SIZE: usize = 2 * USIZE_BYTES;
/// Repeat the given byte into a word size number. That is, every 8 bits
/// is equivalent to the given byte. For example, if `b` is `\x4E` or
/// `01001110` in binary, then the returned value on a 32-bit system would be:
/// `01001110_01001110_01001110_01001110`.
#[inline(always)]
fn repeat_byte(b: u8) -> usize {
(b as usize) * (usize::MAX / 255)
}
pub fn inv_memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
let vn1 = repeat_byte(n1);
let confirm = |byte| byte != n1;
let loop_size = cmp::min(LOOP_SIZE, haystack.len());
let align = USIZE_BYTES - 1;
let start_ptr = haystack.as_ptr();
let end_ptr = haystack[haystack.len()..].as_ptr();
let mut ptr = start_ptr;
unsafe {
if haystack.len() < USIZE_BYTES {
return forward_search(start_ptr, end_ptr, ptr, confirm);
}
let chunk = read_unaligned_usize(ptr);
if (chunk ^ vn1) != 0 {
return forward_search(start_ptr, end_ptr, ptr, confirm);
}
ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
debug_assert!(ptr > start_ptr);
debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
let a = *(ptr as *const usize);
let b = *(ptr.add(USIZE_BYTES) as *const usize);
let eqa = (a ^ vn1) != 0;
let eqb = (b ^ vn1) != 0;
if eqa || eqb {
break;
}
ptr = ptr.add(LOOP_SIZE);
}
forward_search(start_ptr, end_ptr, ptr, confirm)
}
}
/// Return the last index not matching the byte `x` in `text`.
pub fn inv_memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
let vn1 = repeat_byte(n1);
let confirm = |byte| byte != n1;
let loop_size = cmp::min(LOOP_SIZE, haystack.len());
let align = USIZE_BYTES - 1;
let start_ptr = haystack.as_ptr();
let end_ptr = haystack[haystack.len()..].as_ptr();
let mut ptr = end_ptr;
unsafe {
if haystack.len() < USIZE_BYTES {
return reverse_search(start_ptr, end_ptr, ptr, confirm);
}
let chunk = read_unaligned_usize(ptr.sub(USIZE_BYTES));
if (chunk ^ vn1) != 0 {
return reverse_search(start_ptr, end_ptr, ptr, confirm);
}
ptr = (end_ptr as usize & !align) as *const u8;
debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize);
let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize);
let eqa = (a ^ vn1) != 0;
let eqb = (b ^ vn1) != 0;
if eqa || eqb {
break;
}
ptr = ptr.sub(loop_size);
}
reverse_search(start_ptr, end_ptr, ptr, confirm)
}
}
#[inline(always)]
unsafe fn forward_search<F: Fn(u8) -> bool>(
start_ptr: *const u8,
end_ptr: *const u8,
mut ptr: *const u8,
confirm: F,
) -> Option<usize> {
debug_assert!(start_ptr <= ptr);
debug_assert!(ptr <= end_ptr);
while ptr < end_ptr {
if confirm(*ptr) {
return Some(sub(ptr, start_ptr));
}
ptr = ptr.offset(1);
}
None
}
#[inline(always)]
unsafe fn reverse_search<F: Fn(u8) -> bool>(
start_ptr: *const u8,
end_ptr: *const u8,
mut ptr: *const u8,
confirm: F,
) -> Option<usize> {
debug_assert!(start_ptr <= ptr);
debug_assert!(ptr <= end_ptr);
while ptr > start_ptr {
ptr = ptr.offset(-1);
if confirm(*ptr) {
return Some(sub(ptr, start_ptr));
}
}
None
}
unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
(ptr as *const usize).read_unaligned()
}
/// Subtract `b` from `a` and return the difference. `a` should be greater than
/// or equal to `b`.
fn sub(a: *const u8, b: *const u8) -> usize {
debug_assert!(a >= b);
(a as usize) - (b as usize)
}
/// Safe wrapper around `forward_search`
#[inline]
pub(crate) fn forward_search_bytes<F: Fn(u8) -> bool>(
s: &[u8],
confirm: F,
) -> Option<usize> {
unsafe {
let start = s.as_ptr();
let end = start.add(s.len());
forward_search(start, end, start, confirm)
}
}
/// Safe wrapper around `reverse_search`
#[inline]
pub(crate) fn reverse_search_bytes<F: Fn(u8) -> bool>(
s: &[u8],
confirm: F,
) -> Option<usize> {
unsafe {
let start = s.as_ptr();
let end = start.add(s.len());
reverse_search(start, end, end, confirm)
}
}
#[cfg(test)]
mod tests {
use super::{inv_memchr, inv_memrchr};
// search string, search byte, inv_memchr result, inv_memrchr result.
// these are expanded into a much larger set of tests in build_tests
const TESTS: &[(&[u8], u8, usize, usize)] = &[
(b"z", b'a', 0, 0),
(b"zz", b'a', 0, 1),
(b"aza", b'a', 1, 1),
(b"zaz", b'a', 0, 2),
(b"zza", b'a', 0, 1),
(b"zaa", b'a', 0, 0),
(b"zzz", b'a', 0, 2),
];
type TestCase = (Vec<u8>, u8, Option<(usize, usize)>);
fn build_tests() -> Vec<TestCase> {
let mut result = vec![];
for &(search, byte, fwd_pos, rev_pos) in TESTS {
result.push((search.to_vec(), byte, Some((fwd_pos, rev_pos))));
for i in 1..515 {
// add a bunch of copies of the search byte to the end.
let mut suffixed: Vec<u8> = search.into();
suffixed.extend(std::iter::repeat(byte).take(i));
result.push((suffixed, byte, Some((fwd_pos, rev_pos))));
// add a bunch of copies of the search byte to the start.
let mut prefixed: Vec<u8> =
std::iter::repeat(byte).take(i).collect();
prefixed.extend(search);
result.push((
prefixed,
byte,
Some((fwd_pos + i, rev_pos + i)),
));
// add a bunch of copies of the search byte to both ends.
let mut surrounded: Vec<u8> =
std::iter::repeat(byte).take(i).collect();
surrounded.extend(search);
surrounded.extend(std::iter::repeat(byte).take(i));
result.push((
surrounded,
byte,
Some((fwd_pos + i, rev_pos + i)),
));
}
}
// build non-matching tests for several sizes
for i in 0..515 {
result.push((
std::iter::repeat(b'\0').take(i).collect(),
b'\0',
None,
));
}
result
}
#[test]
fn test_inv_memchr() {
use {ByteSlice, B};
for (search, byte, matching) in build_tests() {
assert_eq!(
inv_memchr(byte, &search),
matching.map(|m| m.0),
"inv_memchr when searching for {:?} in {:?}",
byte as char,
// better printing
B(&search).as_bstr(),
);
assert_eq!(
inv_memrchr(byte, &search),
matching.map(|m| m.1),
"inv_memrchr when searching for {:?} in {:?}",
byte as char,
// better printing
B(&search).as_bstr(),
);
// Test a rather large number off offsets for potential alignment issues
for offset in 1..130 {
if offset >= search.len() {
break;
}
// If this would cause us to shift the results off the end, skip
// it so that we don't have to recompute them.
if let Some((f, r)) = matching {
if offset > f || offset > r {
break;
}
}
let realigned = &search[offset..];
let forward_pos = matching.map(|m| m.0 - offset);
let reverse_pos = matching.map(|m| m.1 - offset);
assert_eq!(
inv_memchr(byte, &realigned),
forward_pos,
"inv_memchr when searching (realigned by {}) for {:?} in {:?}",
offset,
byte as char,
realigned.as_bstr(),
);
assert_eq!(
inv_memrchr(byte, &realigned),
reverse_pos,
"inv_memrchr when searching (realigned by {}) for {:?} in {:?}",
offset,
byte as char,
realigned.as_bstr(),
);
}
}
}
}

View File

@ -0,0 +1,84 @@
use core::ops;
#[cfg(feature = "std")]
use std::borrow::Cow;
/// A specialized copy-on-write byte string.
///
/// The purpose of this type is to permit usage of a "borrowed or owned
/// byte string" in a way that keeps std/no-std compatibility. That is, in
/// no-std mode, this type devolves into a simple &[u8] with no owned variant
/// availble.
#[derive(Clone, Debug)]
pub struct CowBytes<'a>(Imp<'a>);
#[cfg(feature = "std")]
#[derive(Clone, Debug)]
struct Imp<'a>(Cow<'a, [u8]>);
#[cfg(not(feature = "std"))]
#[derive(Clone, Debug)]
struct Imp<'a>(&'a [u8]);
impl<'a> ops::Deref for CowBytes<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
self.as_slice()
}
}
impl<'a> CowBytes<'a> {
/// Create a new borrowed CowBytes.
pub fn new<B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> CowBytes<'a> {
CowBytes(Imp::new(bytes.as_ref()))
}
/// Create a new owned CowBytes.
#[cfg(feature = "std")]
pub fn new_owned(bytes: Vec<u8>) -> CowBytes<'static> {
CowBytes(Imp(Cow::Owned(bytes)))
}
/// Return a borrowed byte string, regardless of whether this is an owned
/// or borrowed byte string internally.
pub fn as_slice(&self) -> &[u8] {
self.0.as_slice()
}
/// Return an owned version of this copy-on-write byte string.
///
/// If this is already an owned byte string internally, then this is a
/// no-op. Otherwise, the internal byte string is copied.
#[cfg(feature = "std")]
pub fn into_owned(self) -> CowBytes<'static> {
match (self.0).0 {
Cow::Borrowed(b) => CowBytes::new_owned(b.to_vec()),
Cow::Owned(b) => CowBytes::new_owned(b),
}
}
}
impl<'a> Imp<'a> {
#[cfg(feature = "std")]
pub fn new(bytes: &'a [u8]) -> Imp<'a> {
Imp(Cow::Borrowed(bytes))
}
#[cfg(not(feature = "std"))]
pub fn new(bytes: &'a [u8]) -> Imp<'a> {
Imp(bytes)
}
#[cfg(feature = "std")]
pub fn as_slice(&self) -> &[u8] {
match self.0 {
Cow::Owned(ref x) => x,
Cow::Borrowed(x) => x,
}
}
#[cfg(not(feature = "std"))]
pub fn as_slice(&self) -> &[u8] {
self.0
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,984 @@
macro_rules! impl_partial_eq {
($lhs:ty, $rhs:ty) => {
impl<'a, 'b> PartialEq<$rhs> for $lhs {
#[inline]
fn eq(&self, other: &$rhs) -> bool {
let other: &[u8] = other.as_ref();
PartialEq::eq(self.as_bytes(), other)
}
}
impl<'a, 'b> PartialEq<$lhs> for $rhs {
#[inline]
fn eq(&self, other: &$lhs) -> bool {
let this: &[u8] = self.as_ref();
PartialEq::eq(this, other.as_bytes())
}
}
};
}
#[cfg(feature = "std")]
macro_rules! impl_partial_eq_cow {
($lhs:ty, $rhs:ty) => {
impl<'a, 'b> PartialEq<$rhs> for $lhs {
#[inline]
fn eq(&self, other: &$rhs) -> bool {
let other: &[u8] = (&**other).as_ref();
PartialEq::eq(self.as_bytes(), other)
}
}
impl<'a, 'b> PartialEq<$lhs> for $rhs {
#[inline]
fn eq(&self, other: &$lhs) -> bool {
let this: &[u8] = (&**other).as_ref();
PartialEq::eq(this, self.as_bytes())
}
}
};
}
macro_rules! impl_partial_ord {
($lhs:ty, $rhs:ty) => {
impl<'a, 'b> PartialOrd<$rhs> for $lhs {
#[inline]
fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
let other: &[u8] = other.as_ref();
PartialOrd::partial_cmp(self.as_bytes(), other)
}
}
impl<'a, 'b> PartialOrd<$lhs> for $rhs {
#[inline]
fn partial_cmp(&self, other: &$lhs) -> Option<Ordering> {
let this: &[u8] = self.as_ref();
PartialOrd::partial_cmp(this, other.as_bytes())
}
}
};
}
#[cfg(feature = "std")]
mod bstring {
use std::borrow::{Borrow, Cow, ToOwned};
use std::cmp::Ordering;
use std::fmt;
use std::iter::FromIterator;
use std::ops;
use bstr::BStr;
use bstring::BString;
use ext_vec::ByteVec;
impl fmt::Display for BString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self.as_bstr(), f)
}
}
impl fmt::Debug for BString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self.as_bstr(), f)
}
}
impl ops::Deref for BString {
type Target = Vec<u8>;
#[inline]
fn deref(&self) -> &Vec<u8> {
&self.bytes
}
}
impl ops::DerefMut for BString {
#[inline]
fn deref_mut(&mut self) -> &mut Vec<u8> {
&mut self.bytes
}
}
impl AsRef<[u8]> for BString {
#[inline]
fn as_ref(&self) -> &[u8] {
&self.bytes
}
}
impl AsRef<BStr> for BString {
#[inline]
fn as_ref(&self) -> &BStr {
self.as_bstr()
}
}
impl AsMut<[u8]> for BString {
#[inline]
fn as_mut(&mut self) -> &mut [u8] {
&mut self.bytes
}
}
impl AsMut<BStr> for BString {
#[inline]
fn as_mut(&mut self) -> &mut BStr {
self.as_mut_bstr()
}
}
impl Borrow<BStr> for BString {
#[inline]
fn borrow(&self) -> &BStr {
self.as_bstr()
}
}
impl ToOwned for BStr {
type Owned = BString;
#[inline]
fn to_owned(&self) -> BString {
BString::from(self)
}
}
impl Default for BString {
fn default() -> BString {
BString::from(vec![])
}
}
impl<'a> From<&'a [u8]> for BString {
#[inline]
fn from(s: &'a [u8]) -> BString {
BString::from(s.to_vec())
}
}
impl From<Vec<u8>> for BString {
#[inline]
fn from(s: Vec<u8>) -> BString {
BString { bytes: s }
}
}
impl From<BString> for Vec<u8> {
#[inline]
fn from(s: BString) -> Vec<u8> {
s.bytes
}
}
impl<'a> From<&'a str> for BString {
#[inline]
fn from(s: &'a str) -> BString {
BString::from(s.as_bytes().to_vec())
}
}
impl From<String> for BString {
#[inline]
fn from(s: String) -> BString {
BString::from(s.into_bytes())
}
}
impl<'a> From<&'a BStr> for BString {
#[inline]
fn from(s: &'a BStr) -> BString {
BString::from(s.bytes.to_vec())
}
}
impl<'a> From<BString> for Cow<'a, BStr> {
#[inline]
fn from(s: BString) -> Cow<'a, BStr> {
Cow::Owned(s)
}
}
impl FromIterator<char> for BString {
#[inline]
fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> BString {
BString::from(iter.into_iter().collect::<String>())
}
}
impl FromIterator<u8> for BString {
#[inline]
fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> BString {
BString::from(iter.into_iter().collect::<Vec<u8>>())
}
}
impl<'a> FromIterator<&'a str> for BString {
#[inline]
fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> BString {
let mut buf = vec![];
for b in iter {
buf.push_str(b);
}
BString::from(buf)
}
}
impl<'a> FromIterator<&'a [u8]> for BString {
#[inline]
fn from_iter<T: IntoIterator<Item = &'a [u8]>>(iter: T) -> BString {
let mut buf = vec![];
for b in iter {
buf.push_str(b);
}
BString::from(buf)
}
}
impl<'a> FromIterator<&'a BStr> for BString {
#[inline]
fn from_iter<T: IntoIterator<Item = &'a BStr>>(iter: T) -> BString {
let mut buf = vec![];
for b in iter {
buf.push_str(b);
}
BString::from(buf)
}
}
impl FromIterator<BString> for BString {
#[inline]
fn from_iter<T: IntoIterator<Item = BString>>(iter: T) -> BString {
let mut buf = vec![];
for b in iter {
buf.push_str(b);
}
BString::from(buf)
}
}
impl Eq for BString {}
impl PartialEq for BString {
#[inline]
fn eq(&self, other: &BString) -> bool {
&self[..] == &other[..]
}
}
impl_partial_eq!(BString, Vec<u8>);
impl_partial_eq!(BString, [u8]);
impl_partial_eq!(BString, &'a [u8]);
impl_partial_eq!(BString, String);
impl_partial_eq!(BString, str);
impl_partial_eq!(BString, &'a str);
impl_partial_eq!(BString, BStr);
impl_partial_eq!(BString, &'a BStr);
impl PartialOrd for BString {
#[inline]
fn partial_cmp(&self, other: &BString) -> Option<Ordering> {
PartialOrd::partial_cmp(&self.bytes, &other.bytes)
}
}
impl Ord for BString {
#[inline]
fn cmp(&self, other: &BString) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
impl_partial_ord!(BString, Vec<u8>);
impl_partial_ord!(BString, [u8]);
impl_partial_ord!(BString, &'a [u8]);
impl_partial_ord!(BString, String);
impl_partial_ord!(BString, str);
impl_partial_ord!(BString, &'a str);
impl_partial_ord!(BString, BStr);
impl_partial_ord!(BString, &'a BStr);
}
mod bstr {
#[cfg(feature = "std")]
use std::borrow::Cow;
use core::cmp::Ordering;
use core::fmt;
use core::ops;
use bstr::BStr;
use ext_slice::ByteSlice;
impl fmt::Display for BStr {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
/// Write the given bstr (lossily) to the given formatter.
fn write_bstr(
f: &mut fmt::Formatter,
bstr: &BStr,
) -> Result<(), fmt::Error> {
for chunk in bstr.utf8_chunks() {
f.write_str(chunk.valid())?;
if !chunk.invalid().is_empty() {
f.write_str("\u{FFFD}")?;
}
}
Ok(())
}
/// Write 'num' fill characters to the given formatter.
fn write_pads(f: &mut fmt::Formatter, num: usize) -> fmt::Result {
let fill = f.fill();
for _ in 0..num {
f.write_fmt(format_args!("{}", fill))?;
}
Ok(())
}
if let Some(align) = f.align() {
let width = f.width().unwrap_or(0);
let nchars = self.chars().count();
let remaining_pads = width.saturating_sub(nchars);
match align {
fmt::Alignment::Left => {
write_bstr(f, self)?;
write_pads(f, remaining_pads)?;
}
fmt::Alignment::Right => {
write_pads(f, remaining_pads)?;
write_bstr(f, self)?;
}
fmt::Alignment::Center => {
let half = remaining_pads / 2;
let second_half = if remaining_pads % 2 == 0 {
half
} else {
half + 1
};
write_pads(f, half)?;
write_bstr(f, self)?;
write_pads(f, second_half)?;
}
}
Ok(())
} else {
write_bstr(f, self)?;
Ok(())
}
}
}
impl fmt::Debug for BStr {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "\"")?;
for (s, e, ch) in self.char_indices() {
match ch {
'\0' => write!(f, "\\0")?,
'\u{FFFD}' => {
let bytes = self[s..e].as_bytes();
if bytes == b"\xEF\xBF\xBD" {
write!(f, "{}", ch.escape_debug())?;
} else {
for &b in self[s..e].as_bytes() {
write!(f, r"\x{:02X}", b)?;
}
}
}
// ASCII control characters except \0, \n, \r, \t
'\x01'..='\x08'
| '\x0b'
| '\x0c'
| '\x0e'..='\x19'
| '\x7f' => {
write!(f, "\\x{:02x}", ch as u32)?;
}
'\n' | '\r' | '\t' | _ => {
write!(f, "{}", ch.escape_debug())?;
}
}
}
write!(f, "\"")?;
Ok(())
}
}
impl ops::Deref for BStr {
type Target = [u8];
#[inline]
fn deref(&self) -> &[u8] {
&self.bytes
}
}
impl ops::DerefMut for BStr {
#[inline]
fn deref_mut(&mut self) -> &mut [u8] {
&mut self.bytes
}
}
impl ops::Index<usize> for BStr {
type Output = u8;
#[inline]
fn index(&self, idx: usize) -> &u8 {
&self.as_bytes()[idx]
}
}
impl ops::Index<ops::RangeFull> for BStr {
type Output = BStr;
#[inline]
fn index(&self, _: ops::RangeFull) -> &BStr {
self
}
}
impl ops::Index<ops::Range<usize>> for BStr {
type Output = BStr;
#[inline]
fn index(&self, r: ops::Range<usize>) -> &BStr {
BStr::new(&self.as_bytes()[r.start..r.end])
}
}
impl ops::Index<ops::RangeInclusive<usize>> for BStr {
type Output = BStr;
#[inline]
fn index(&self, r: ops::RangeInclusive<usize>) -> &BStr {
BStr::new(&self.as_bytes()[*r.start()..=*r.end()])
}
}
impl ops::Index<ops::RangeFrom<usize>> for BStr {
type Output = BStr;
#[inline]
fn index(&self, r: ops::RangeFrom<usize>) -> &BStr {
BStr::new(&self.as_bytes()[r.start..])
}
}
impl ops::Index<ops::RangeTo<usize>> for BStr {
type Output = BStr;
#[inline]
fn index(&self, r: ops::RangeTo<usize>) -> &BStr {
BStr::new(&self.as_bytes()[..r.end])
}
}
impl ops::Index<ops::RangeToInclusive<usize>> for BStr {
type Output = BStr;
#[inline]
fn index(&self, r: ops::RangeToInclusive<usize>) -> &BStr {
BStr::new(&self.as_bytes()[..=r.end])
}
}
impl ops::IndexMut<usize> for BStr {
#[inline]
fn index_mut(&mut self, idx: usize) -> &mut u8 {
&mut self.bytes[idx]
}
}
impl ops::IndexMut<ops::RangeFull> for BStr {
#[inline]
fn index_mut(&mut self, _: ops::RangeFull) -> &mut BStr {
self
}
}
impl ops::IndexMut<ops::Range<usize>> for BStr {
#[inline]
fn index_mut(&mut self, r: ops::Range<usize>) -> &mut BStr {
BStr::from_bytes_mut(&mut self.bytes[r.start..r.end])
}
}
impl ops::IndexMut<ops::RangeInclusive<usize>> for BStr {
#[inline]
fn index_mut(&mut self, r: ops::RangeInclusive<usize>) -> &mut BStr {
BStr::from_bytes_mut(&mut self.bytes[*r.start()..=*r.end()])
}
}
impl ops::IndexMut<ops::RangeFrom<usize>> for BStr {
#[inline]
fn index_mut(&mut self, r: ops::RangeFrom<usize>) -> &mut BStr {
BStr::from_bytes_mut(&mut self.bytes[r.start..])
}
}
impl ops::IndexMut<ops::RangeTo<usize>> for BStr {
#[inline]
fn index_mut(&mut self, r: ops::RangeTo<usize>) -> &mut BStr {
BStr::from_bytes_mut(&mut self.bytes[..r.end])
}
}
impl ops::IndexMut<ops::RangeToInclusive<usize>> for BStr {
#[inline]
fn index_mut(&mut self, r: ops::RangeToInclusive<usize>) -> &mut BStr {
BStr::from_bytes_mut(&mut self.bytes[..=r.end])
}
}
impl AsRef<[u8]> for BStr {
#[inline]
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl AsRef<BStr> for [u8] {
#[inline]
fn as_ref(&self) -> &BStr {
BStr::new(self)
}
}
impl AsRef<BStr> for str {
#[inline]
fn as_ref(&self) -> &BStr {
BStr::new(self)
}
}
impl AsMut<[u8]> for BStr {
#[inline]
fn as_mut(&mut self) -> &mut [u8] {
&mut self.bytes
}
}
impl AsMut<BStr> for [u8] {
#[inline]
fn as_mut(&mut self) -> &mut BStr {
BStr::new_mut(self)
}
}
impl<'a> Default for &'a BStr {
fn default() -> &'a BStr {
BStr::from_bytes(b"")
}
}
impl<'a> Default for &'a mut BStr {
fn default() -> &'a mut BStr {
BStr::from_bytes_mut(&mut [])
}
}
impl<'a> From<&'a [u8]> for &'a BStr {
#[inline]
fn from(s: &'a [u8]) -> &'a BStr {
BStr::from_bytes(s)
}
}
impl<'a> From<&'a str> for &'a BStr {
#[inline]
fn from(s: &'a str) -> &'a BStr {
BStr::from_bytes(s.as_bytes())
}
}
#[cfg(feature = "std")]
impl<'a> From<&'a BStr> for Cow<'a, BStr> {
#[inline]
fn from(s: &'a BStr) -> Cow<'a, BStr> {
Cow::Borrowed(s)
}
}
#[cfg(feature = "std")]
impl From<Box<[u8]>> for Box<BStr> {
#[inline]
fn from(s: Box<[u8]>) -> Box<BStr> {
BStr::from_boxed_bytes(s)
}
}
#[cfg(feature = "std")]
impl From<Box<BStr>> for Box<[u8]> {
#[inline]
fn from(s: Box<BStr>) -> Box<[u8]> {
BStr::into_boxed_bytes(s)
}
}
impl Eq for BStr {}
impl PartialEq<BStr> for BStr {
#[inline]
fn eq(&self, other: &BStr) -> bool {
self.as_bytes() == other.as_bytes()
}
}
impl_partial_eq!(BStr, [u8]);
impl_partial_eq!(BStr, &'a [u8]);
impl_partial_eq!(BStr, str);
impl_partial_eq!(BStr, &'a str);
#[cfg(feature = "std")]
impl_partial_eq!(BStr, Vec<u8>);
#[cfg(feature = "std")]
impl_partial_eq!(&'a BStr, Vec<u8>);
#[cfg(feature = "std")]
impl_partial_eq!(BStr, String);
#[cfg(feature = "std")]
impl_partial_eq!(&'a BStr, String);
#[cfg(feature = "std")]
impl_partial_eq_cow!(&'a BStr, Cow<'a, BStr>);
#[cfg(feature = "std")]
impl_partial_eq_cow!(&'a BStr, Cow<'a, str>);
#[cfg(feature = "std")]
impl_partial_eq_cow!(&'a BStr, Cow<'a, [u8]>);
impl PartialOrd for BStr {
#[inline]
fn partial_cmp(&self, other: &BStr) -> Option<Ordering> {
PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes())
}
}
impl Ord for BStr {
#[inline]
fn cmp(&self, other: &BStr) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
impl_partial_ord!(BStr, [u8]);
impl_partial_ord!(BStr, &'a [u8]);
impl_partial_ord!(BStr, str);
impl_partial_ord!(BStr, &'a str);
#[cfg(feature = "std")]
impl_partial_ord!(BStr, Vec<u8>);
#[cfg(feature = "std")]
impl_partial_ord!(&'a BStr, Vec<u8>);
#[cfg(feature = "std")]
impl_partial_ord!(BStr, String);
#[cfg(feature = "std")]
impl_partial_ord!(&'a BStr, String);
}
#[cfg(feature = "serde1-nostd")]
mod bstr_serde {
use core::fmt;
use serde::{
de::Error, de::Visitor, Deserialize, Deserializer, Serialize,
Serializer,
};
use bstr::BStr;
impl Serialize for BStr {
#[inline]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_bytes(self.as_bytes())
}
}
impl<'a, 'de: 'a> Deserialize<'de> for &'a BStr {
#[inline]
fn deserialize<D>(deserializer: D) -> Result<&'a BStr, D::Error>
where
D: Deserializer<'de>,
{
struct BStrVisitor;
impl<'de> Visitor<'de> for BStrVisitor {
type Value = &'de BStr;
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("a borrowed byte string")
}
#[inline]
fn visit_borrowed_bytes<E: Error>(
self,
value: &'de [u8],
) -> Result<&'de BStr, E> {
Ok(BStr::new(value))
}
#[inline]
fn visit_borrowed_str<E: Error>(
self,
value: &'de str,
) -> Result<&'de BStr, E> {
Ok(BStr::new(value))
}
}
deserializer.deserialize_bytes(BStrVisitor)
}
}
}
#[cfg(feature = "serde1")]
mod bstring_serde {
use std::cmp;
use std::fmt;
use serde::{
de::Error, de::SeqAccess, de::Visitor, Deserialize, Deserializer,
Serialize, Serializer,
};
use bstring::BString;
impl Serialize for BString {
#[inline]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_bytes(self.as_bytes())
}
}
impl<'de> Deserialize<'de> for BString {
#[inline]
fn deserialize<D>(deserializer: D) -> Result<BString, D::Error>
where
D: Deserializer<'de>,
{
struct BStringVisitor;
impl<'de> Visitor<'de> for BStringVisitor {
type Value = BString;
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("a byte string")
}
#[inline]
fn visit_seq<V: SeqAccess<'de>>(
self,
mut visitor: V,
) -> Result<BString, V::Error> {
let len = cmp::min(visitor.size_hint().unwrap_or(0), 256);
let mut bytes = Vec::with_capacity(len);
while let Some(v) = visitor.next_element()? {
bytes.push(v);
}
Ok(BString::from(bytes))
}
#[inline]
fn visit_bytes<E: Error>(
self,
value: &[u8],
) -> Result<BString, E> {
Ok(BString::from(value))
}
#[inline]
fn visit_byte_buf<E: Error>(
self,
value: Vec<u8>,
) -> Result<BString, E> {
Ok(BString::from(value))
}
#[inline]
fn visit_str<E: Error>(
self,
value: &str,
) -> Result<BString, E> {
Ok(BString::from(value))
}
#[inline]
fn visit_string<E: Error>(
self,
value: String,
) -> Result<BString, E> {
Ok(BString::from(value))
}
}
deserializer.deserialize_byte_buf(BStringVisitor)
}
}
}
#[cfg(test)]
mod display {
use crate::ByteSlice;
use bstring::BString;
#[test]
fn clean() {
assert_eq!(&format!("{}", &b"abc".as_bstr()), "abc");
assert_eq!(&format!("{}", &b"\xf0\x28\x8c\xbc".as_bstr()), "<EFBFBD>(<28><>");
}
#[test]
fn width_bigger_than_bstr() {
assert_eq!(&format!("{:<7}!", &b"abc".as_bstr()), "abc !");
assert_eq!(&format!("{:>7}!", &b"abc".as_bstr()), " abc!");
assert_eq!(&format!("{:^7}!", &b"abc".as_bstr()), " abc !");
assert_eq!(&format!("{:^6}!", &b"abc".as_bstr()), " abc !");
assert_eq!(&format!("{:-<7}!", &b"abc".as_bstr()), "abc----!");
assert_eq!(&format!("{:->7}!", &b"abc".as_bstr()), "----abc!");
assert_eq!(&format!("{:-^7}!", &b"abc".as_bstr()), "--abc--!");
assert_eq!(&format!("{:-^6}!", &b"abc".as_bstr()), "-abc--!");
assert_eq!(
&format!("{:<7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"<EFBFBD>(<28><> !"
);
assert_eq!(
&format!("{:>7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
" <20>(<28><>!"
);
assert_eq!(
&format!("{:^7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
" <20>(<28><> !"
);
assert_eq!(
&format!("{:^6}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
" <20>(<28><> !"
);
assert_eq!(
&format!("{:-<7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"<EFBFBD>(<28><>---!"
);
assert_eq!(
&format!("{:->7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"---<2D>(<28><>!"
);
assert_eq!(
&format!("{:-^7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"-<2D>(<28><>--!"
);
assert_eq!(
&format!("{:-^6}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"-<2D>(<28><>-!"
);
}
#[test]
fn width_lesser_than_bstr() {
assert_eq!(&format!("{:<2}!", &b"abc".as_bstr()), "abc!");
assert_eq!(&format!("{:>2}!", &b"abc".as_bstr()), "abc!");
assert_eq!(&format!("{:^2}!", &b"abc".as_bstr()), "abc!");
assert_eq!(&format!("{:-<2}!", &b"abc".as_bstr()), "abc!");
assert_eq!(&format!("{:->2}!", &b"abc".as_bstr()), "abc!");
assert_eq!(&format!("{:-^2}!", &b"abc".as_bstr()), "abc!");
assert_eq!(
&format!("{:<3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"<EFBFBD>(<28><>!"
);
assert_eq!(
&format!("{:>3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"<EFBFBD>(<28><>!"
);
assert_eq!(
&format!("{:^3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"<EFBFBD>(<28><>!"
);
assert_eq!(
&format!("{:^2}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"<EFBFBD>(<28><>!"
);
assert_eq!(
&format!("{:-<3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"<EFBFBD>(<28><>!"
);
assert_eq!(
&format!("{:->3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"<EFBFBD>(<28><>!"
);
assert_eq!(
&format!("{:-^3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"<EFBFBD>(<28><>!"
);
assert_eq!(
&format!("{:-^2}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
"<EFBFBD>(<28><>!"
);
}
quickcheck! {
fn total_length(bstr: BString) -> bool {
let size = bstr.chars().count();
format!("{:<1$}", bstr.as_bstr(), size).chars().count() >= size
}
}
}
#[cfg(test)]
mod bstring_arbitrary {
use bstring::BString;
use quickcheck::{Arbitrary, Gen};
impl Arbitrary for BString {
fn arbitrary(g: &mut Gen) -> BString {
BString::from(Vec::<u8>::arbitrary(g))
}
fn shrink(&self) -> Box<dyn Iterator<Item = BString>> {
Box::new(self.bytes.shrink().map(BString::from))
}
}
}
#[test]
fn test_debug() {
use crate::{ByteSlice, B};
assert_eq!(
r#""\0\0\0 ftypisom\0\0\x02\0isomiso2avc1mp""#,
format!("{:?}", b"\0\0\0 ftypisom\0\0\x02\0isomiso2avc1mp".as_bstr()),
);
// Tests that if the underlying bytes contain the UTF-8 encoding of the
// replacement codepoint, then we emit the codepoint just like other
// non-printable Unicode characters.
assert_eq!(
b"\"\\xFF\xEF\xBF\xBD\\xFF\"".as_bstr(),
// Before fixing #72, the output here would be:
// \\xFF\\xEF\\xBF\\xBD\\xFF
B(&format!("{:?}", b"\xFF\xEF\xBF\xBD\xFF".as_bstr())).as_bstr(),
);
}
// See: https://github.com/BurntSushi/bstr/issues/82
#[test]
fn test_cows_regression() {
use crate::ByteSlice;
use std::borrow::Cow;
let c1 = Cow::from(b"hello bstr".as_bstr());
let c2 = b"goodbye bstr".as_bstr();
assert_ne!(c1, c2);
let c3 = Cow::from("hello str");
let c4 = "goodbye str";
assert_ne!(c3, c4);
}

View File

@ -0,0 +1,514 @@
/*!
Utilities for working with I/O using byte strings.
This module currently only exports a single trait, `BufReadExt`, which provides
facilities for conveniently and efficiently working with lines as byte strings.
More APIs may be added in the future.
*/
use std::io;
use ext_slice::ByteSlice;
use ext_vec::ByteVec;
/// An extention trait for
/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html)
/// which provides convenience APIs for dealing with byte strings.
pub trait BufReadExt: io::BufRead {
/// Returns an iterator over the lines of this reader, where each line
/// is represented as a byte string.
///
/// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
/// an error is yielded if there was a problem reading from the underlying
/// reader.
///
/// On success, the next line in the iterator is returned. The line does
/// *not* contain a trailing `\n` or `\r\n`.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::io;
///
/// use bstr::io::BufReadExt;
///
/// # fn example() -> Result<(), io::Error> {
/// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
///
/// let mut lines = vec![];
/// for result in cursor.byte_lines() {
/// let line = result?;
/// lines.push(line);
/// }
/// assert_eq!(lines.len(), 3);
/// assert_eq!(lines[0], "lorem".as_bytes());
/// assert_eq!(lines[1], "ipsum".as_bytes());
/// assert_eq!(lines[2], "dolor".as_bytes());
/// # Ok(()) }; example().unwrap()
/// ```
fn byte_lines(self) -> ByteLines<Self>
where
Self: Sized,
{
ByteLines { buf: self }
}
/// Returns an iterator over byte-terminated records of this reader, where
/// each record is represented as a byte string.
///
/// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
/// an error is yielded if there was a problem reading from the underlying
/// reader.
///
/// On success, the next record in the iterator is returned. The record
/// does *not* contain its trailing terminator.
///
/// Note that calling `byte_records(b'\n')` differs from `byte_lines()` in
/// that it has no special handling for `\r`.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::io;
///
/// use bstr::io::BufReadExt;
///
/// # fn example() -> Result<(), io::Error> {
/// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
///
/// let mut records = vec![];
/// for result in cursor.byte_records(b'\x00') {
/// let record = result?;
/// records.push(record);
/// }
/// assert_eq!(records.len(), 3);
/// assert_eq!(records[0], "lorem".as_bytes());
/// assert_eq!(records[1], "ipsum".as_bytes());
/// assert_eq!(records[2], "dolor".as_bytes());
/// # Ok(()) }; example().unwrap()
/// ```
fn byte_records(self, terminator: u8) -> ByteRecords<Self>
where
Self: Sized,
{
ByteRecords { terminator, buf: self }
}
/// Executes the given closure on each line in the underlying reader.
///
/// If the closure returns an error (or if the underlying reader returns an
/// error), then iteration is stopped and the error is returned. If false
/// is returned, then iteration is stopped and no error is returned.
///
/// The closure given is called on exactly the same values as yielded by
/// the [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
/// iterator. Namely, lines do _not_ contain trailing `\n` or `\r\n` bytes.
///
/// This routine is useful for iterating over lines as quickly as
/// possible. Namely, a single allocation is reused for each line.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::io;
///
/// use bstr::io::BufReadExt;
///
/// # fn example() -> Result<(), io::Error> {
/// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
///
/// let mut lines = vec![];
/// cursor.for_byte_line(|line| {
/// lines.push(line.to_vec());
/// Ok(true)
/// })?;
/// assert_eq!(lines.len(), 3);
/// assert_eq!(lines[0], "lorem".as_bytes());
/// assert_eq!(lines[1], "ipsum".as_bytes());
/// assert_eq!(lines[2], "dolor".as_bytes());
/// # Ok(()) }; example().unwrap()
/// ```
fn for_byte_line<F>(self, mut for_each_line: F) -> io::Result<()>
where
Self: Sized,
F: FnMut(&[u8]) -> io::Result<bool>,
{
self.for_byte_line_with_terminator(|line| {
for_each_line(&trim_line_slice(&line))
})
}
/// Executes the given closure on each byte-terminated record in the
/// underlying reader.
///
/// If the closure returns an error (or if the underlying reader returns an
/// error), then iteration is stopped and the error is returned. If false
/// is returned, then iteration is stopped and no error is returned.
///
/// The closure given is called on exactly the same values as yielded by
/// the [`byte_records`](trait.BufReadExt.html#method.byte_records)
/// iterator. Namely, records do _not_ contain a trailing terminator byte.
///
/// This routine is useful for iterating over records as quickly as
/// possible. Namely, a single allocation is reused for each record.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::io;
///
/// use bstr::io::BufReadExt;
///
/// # fn example() -> Result<(), io::Error> {
/// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
///
/// let mut records = vec![];
/// cursor.for_byte_record(b'\x00', |record| {
/// records.push(record.to_vec());
/// Ok(true)
/// })?;
/// assert_eq!(records.len(), 3);
/// assert_eq!(records[0], "lorem".as_bytes());
/// assert_eq!(records[1], "ipsum".as_bytes());
/// assert_eq!(records[2], "dolor".as_bytes());
/// # Ok(()) }; example().unwrap()
/// ```
fn for_byte_record<F>(
self,
terminator: u8,
mut for_each_record: F,
) -> io::Result<()>
where
Self: Sized,
F: FnMut(&[u8]) -> io::Result<bool>,
{
self.for_byte_record_with_terminator(terminator, |chunk| {
for_each_record(&trim_record_slice(&chunk, terminator))
})
}
/// Executes the given closure on each line in the underlying reader.
///
/// If the closure returns an error (or if the underlying reader returns an
/// error), then iteration is stopped and the error is returned. If false
/// is returned, then iteration is stopped and no error is returned.
///
/// Unlike
/// [`for_byte_line`](trait.BufReadExt.html#method.for_byte_line),
/// the lines given to the closure *do* include the line terminator, if one
/// exists.
///
/// This routine is useful for iterating over lines as quickly as
/// possible. Namely, a single allocation is reused for each line.
///
/// This is identical to `for_byte_record_with_terminator` with a
/// terminator of `\n`.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::io;
///
/// use bstr::io::BufReadExt;
///
/// # fn example() -> Result<(), io::Error> {
/// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
///
/// let mut lines = vec![];
/// cursor.for_byte_line_with_terminator(|line| {
/// lines.push(line.to_vec());
/// Ok(true)
/// })?;
/// assert_eq!(lines.len(), 3);
/// assert_eq!(lines[0], "lorem\n".as_bytes());
/// assert_eq!(lines[1], "ipsum\r\n".as_bytes());
/// assert_eq!(lines[2], "dolor".as_bytes());
/// # Ok(()) }; example().unwrap()
/// ```
fn for_byte_line_with_terminator<F>(
self,
for_each_line: F,
) -> io::Result<()>
where
Self: Sized,
F: FnMut(&[u8]) -> io::Result<bool>,
{
self.for_byte_record_with_terminator(b'\n', for_each_line)
}
/// Executes the given closure on each byte-terminated record in the
/// underlying reader.
///
/// If the closure returns an error (or if the underlying reader returns an
/// error), then iteration is stopped and the error is returned. If false
/// is returned, then iteration is stopped and no error is returned.
///
/// Unlike
/// [`for_byte_record`](trait.BufReadExt.html#method.for_byte_record),
/// the lines given to the closure *do* include the record terminator, if
/// one exists.
///
/// This routine is useful for iterating over records as quickly as
/// possible. Namely, a single allocation is reused for each record.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::io;
///
/// use bstr::B;
/// use bstr::io::BufReadExt;
///
/// # fn example() -> Result<(), io::Error> {
/// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
///
/// let mut records = vec![];
/// cursor.for_byte_record_with_terminator(b'\x00', |record| {
/// records.push(record.to_vec());
/// Ok(true)
/// })?;
/// assert_eq!(records.len(), 3);
/// assert_eq!(records[0], B(b"lorem\x00"));
/// assert_eq!(records[1], B("ipsum\x00"));
/// assert_eq!(records[2], B("dolor"));
/// # Ok(()) }; example().unwrap()
/// ```
fn for_byte_record_with_terminator<F>(
mut self,
terminator: u8,
mut for_each_record: F,
) -> io::Result<()>
where
Self: Sized,
F: FnMut(&[u8]) -> io::Result<bool>,
{
let mut bytes = vec![];
let mut res = Ok(());
let mut consumed = 0;
'outer: loop {
// Lend out complete record slices from our buffer
{
let mut buf = self.fill_buf()?;
while let Some(index) = buf.find_byte(terminator) {
let (record, rest) = buf.split_at(index + 1);
buf = rest;
consumed += record.len();
match for_each_record(&record) {
Ok(false) => break 'outer,
Err(err) => {
res = Err(err);
break 'outer;
}
_ => (),
}
}
// Copy the final record fragment to our local buffer. This
// saves read_until() from re-scanning a buffer we know
// contains no remaining terminators.
bytes.extend_from_slice(&buf);
consumed += buf.len();
}
self.consume(consumed);
consumed = 0;
// N.B. read_until uses a different version of memchr that may
// be slower than the memchr crate that bstr uses. However, this
// should only run for a fairly small number of records, assuming a
// decent buffer size.
self.read_until(terminator, &mut bytes)?;
if bytes.is_empty() || !for_each_record(&bytes)? {
break;
}
bytes.clear();
}
self.consume(consumed);
res
}
}
impl<B: io::BufRead> BufReadExt for B {}
/// An iterator over lines from an instance of
/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
///
/// This iterator is generally created by calling the
/// [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
/// method on the
/// [`BufReadExt`](trait.BufReadExt.html)
/// trait.
#[derive(Debug)]
pub struct ByteLines<B> {
buf: B,
}
/// An iterator over records from an instance of
/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
///
/// A byte record is any sequence of bytes terminated by a particular byte
/// chosen by the caller. For example, NUL separated byte strings are said to
/// be NUL-terminated byte records.
///
/// This iterator is generally created by calling the
/// [`byte_records`](trait.BufReadExt.html#method.byte_records)
/// method on the
/// [`BufReadExt`](trait.BufReadExt.html)
/// trait.
#[derive(Debug)]
pub struct ByteRecords<B> {
buf: B,
terminator: u8,
}
impl<B: io::BufRead> Iterator for ByteLines<B> {
type Item = io::Result<Vec<u8>>;
fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
let mut bytes = vec![];
match self.buf.read_until(b'\n', &mut bytes) {
Err(e) => Some(Err(e)),
Ok(0) => None,
Ok(_) => {
trim_line(&mut bytes);
Some(Ok(bytes))
}
}
}
}
impl<B: io::BufRead> Iterator for ByteRecords<B> {
type Item = io::Result<Vec<u8>>;
fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
let mut bytes = vec![];
match self.buf.read_until(self.terminator, &mut bytes) {
Err(e) => Some(Err(e)),
Ok(0) => None,
Ok(_) => {
trim_record(&mut bytes, self.terminator);
Some(Ok(bytes))
}
}
}
}
fn trim_line(line: &mut Vec<u8>) {
if line.last_byte() == Some(b'\n') {
line.pop_byte();
if line.last_byte() == Some(b'\r') {
line.pop_byte();
}
}
}
fn trim_line_slice(mut line: &[u8]) -> &[u8] {
if line.last_byte() == Some(b'\n') {
line = &line[..line.len() - 1];
if line.last_byte() == Some(b'\r') {
line = &line[..line.len() - 1];
}
}
line
}
fn trim_record(record: &mut Vec<u8>, terminator: u8) {
if record.last_byte() == Some(terminator) {
record.pop_byte();
}
}
fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] {
if record.last_byte() == Some(terminator) {
record = &record[..record.len() - 1];
}
record
}
#[cfg(test)]
mod tests {
use super::BufReadExt;
use bstring::BString;
fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
let mut lines = vec![];
slice
.as_ref()
.for_byte_line(|line| {
lines.push(BString::from(line.to_vec()));
Ok(true)
})
.unwrap();
lines
}
fn collect_lines_term<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
let mut lines = vec![];
slice
.as_ref()
.for_byte_line_with_terminator(|line| {
lines.push(BString::from(line.to_vec()));
Ok(true)
})
.unwrap();
lines
}
#[test]
fn lines_without_terminator() {
assert_eq!(collect_lines(""), Vec::<BString>::new());
assert_eq!(collect_lines("\n"), vec![""]);
assert_eq!(collect_lines("\n\n"), vec!["", ""]);
assert_eq!(collect_lines("a\nb\n"), vec!["a", "b"]);
assert_eq!(collect_lines("a\nb"), vec!["a", "b"]);
assert_eq!(collect_lines("abc\nxyz\n"), vec!["abc", "xyz"]);
assert_eq!(collect_lines("abc\nxyz"), vec!["abc", "xyz"]);
assert_eq!(collect_lines("\r\n"), vec![""]);
assert_eq!(collect_lines("\r\n\r\n"), vec!["", ""]);
assert_eq!(collect_lines("a\r\nb\r\n"), vec!["a", "b"]);
assert_eq!(collect_lines("a\r\nb"), vec!["a", "b"]);
assert_eq!(collect_lines("abc\r\nxyz\r\n"), vec!["abc", "xyz"]);
assert_eq!(collect_lines("abc\r\nxyz"), vec!["abc", "xyz"]);
assert_eq!(collect_lines("abc\rxyz"), vec!["abc\rxyz"]);
}
#[test]
fn lines_with_terminator() {
assert_eq!(collect_lines_term(""), Vec::<BString>::new());
assert_eq!(collect_lines_term("\n"), vec!["\n"]);
assert_eq!(collect_lines_term("\n\n"), vec!["\n", "\n"]);
assert_eq!(collect_lines_term("a\nb\n"), vec!["a\n", "b\n"]);
assert_eq!(collect_lines_term("a\nb"), vec!["a\n", "b"]);
assert_eq!(collect_lines_term("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
assert_eq!(collect_lines_term("abc\nxyz"), vec!["abc\n", "xyz"]);
assert_eq!(collect_lines_term("\r\n"), vec!["\r\n"]);
assert_eq!(collect_lines_term("\r\n\r\n"), vec!["\r\n", "\r\n"]);
assert_eq!(collect_lines_term("a\r\nb\r\n"), vec!["a\r\n", "b\r\n"]);
assert_eq!(collect_lines_term("a\r\nb"), vec!["a\r\n", "b"]);
assert_eq!(
collect_lines_term("abc\r\nxyz\r\n"),
vec!["abc\r\n", "xyz\r\n"]
);
assert_eq!(collect_lines_term("abc\r\nxyz"), vec!["abc\r\n", "xyz"]);
assert_eq!(collect_lines_term("abc\rxyz"), vec!["abc\rxyz"]);
}
}

View File

@ -0,0 +1,456 @@
/*!
An experimental byte string library.
Byte strings are just like standard Unicode strings with one very important
difference: byte strings are only *conventionally* UTF-8 while Rust's standard
Unicode strings are *guaranteed* to be valid UTF-8. The primary motivation for
byte strings is for handling arbitrary bytes that are mostly UTF-8.
# Overview
This crate provides two important traits that provide string oriented methods
on `&[u8]` and `Vec<u8>` types:
* [`ByteSlice`](trait.ByteSlice.html) extends the `[u8]` type with additional
string oriented methods.
* [`ByteVec`](trait.ByteVec.html) extends the `Vec<u8>` type with additional
string oriented methods.
Additionally, this crate provides two concrete byte string types that deref to
`[u8]` and `Vec<u8>`. These are useful for storing byte string types, and come
with convenient `std::fmt::Debug` implementations:
* [`BStr`](struct.BStr.html) is a byte string slice, analogous to `str`.
* [`BString`](struct.BString.html) is an owned growable byte string buffer,
analogous to `String`.
Additionally, the free function [`B`](fn.B.html) serves as a convenient short
hand for writing byte string literals.
# Quick examples
Byte strings build on the existing APIs for `Vec<u8>` and `&[u8]`, with
additional string oriented methods. Operations such as iterating over
graphemes, searching for substrings, replacing substrings, trimming and case
conversion are examples of things not provided on the standard library `&[u8]`
APIs but are provided by this crate. For example, this code iterates over all
of occurrences of a subtring:
```
use bstr::ByteSlice;
let s = b"foo bar foo foo quux foo";
let mut matches = vec![];
for start in s.find_iter("foo") {
matches.push(start);
}
assert_eq!(matches, [0, 8, 12, 21]);
```
Here's another example showing how to do a search and replace (and also showing
use of the `B` function):
```
use bstr::{B, ByteSlice};
let old = B("foo ☃☃☃ foo foo quux foo");
let new = old.replace("foo", "hello");
assert_eq!(new, B("hello ☃☃☃ hello hello quux hello"));
```
And here's an example that shows case conversion, even in the presence of
invalid UTF-8:
```
use bstr::{ByteSlice, ByteVec};
let mut lower = Vec::from("hello β");
lower[0] = b'\xFF';
// lowercase β is uppercased to Β
assert_eq!(lower.to_uppercase(), b"\xFFELLO \xCE\x92");
```
# Convenient debug representation
When working with byte strings, it is often useful to be able to print them
as if they were byte strings and not sequences of integers. While this crate
cannot affect the `std::fmt::Debug` implementations for `[u8]` and `Vec<u8>`,
this crate does provide the `BStr` and `BString` types which have convenient
`std::fmt::Debug` implementations.
For example, this
```
use bstr::ByteSlice;
let mut bytes = Vec::from("hello β");
bytes[0] = b'\xFF';
println!("{:?}", bytes.as_bstr());
```
will output `"\xFFello β"`.
This example works because the
[`ByteSlice::as_bstr`](trait.ByteSlice.html#method.as_bstr)
method converts any `&[u8]` to a `&BStr`.
# When should I use byte strings?
This library is somewhat of an experiment that reflects my hypothesis that
UTF-8 by convention is a better trade off in some circumstances than guaranteed
UTF-8. It's possible, perhaps even likely, that this is a niche concern for
folks working closely with core text primitives.
The first time this idea hit me was in the implementation of Rust's regex
engine. In particular, very little of the internal implementation cares at all
about searching valid UTF-8 encoded strings. Indeed, internally, the
implementation converts `&str` from the API to `&[u8]` fairly quickly and
just deals with raw bytes. UTF-8 match boundaries are then guaranteed by the
finite state machine itself rather than any specific string type. This makes it
possible to not only run regexes on `&str` values, but also on `&[u8]` values.
Why would you ever want to run a regex on a `&[u8]` though? Well, `&[u8]` is
the fundamental way at which one reads data from all sorts of streams, via the
standard library's [`Read`](https://doc.rust-lang.org/std/io/trait.Read.html)
trait. In particular, there is no platform independent way to determine whether
what you're reading from is some binary file or a human readable text file.
Therefore, if you're writing a program to search files, you probably need to
deal with `&[u8]` directly unless you're okay with first converting it to a
`&str` and dropping any bytes that aren't valid UTF-8. (Or otherwise determine
the encoding---which is often impractical---and perform a transcoding step.)
Often, the simplest and most robust way to approach this is to simply treat the
contents of a file as if it were mostly valid UTF-8 and pass through invalid
UTF-8 untouched. This may not be the most correct approach though!
One case in particular exacerbates these issues, and that's memory mapping
a file. When you memory map a file, that file may be gigabytes big, but all
you get is a `&[u8]`. Converting that to a `&str` all in one go is generally
not a good idea because of the costs associated with doing so, and also
because it generally causes one to do two passes over the data instead of
one, which is quite undesirable. It is of course usually possible to do it an
incremental way by only parsing chunks at a time, but this is often complex to
do or impractical. For example, many regex engines only accept one contiguous
sequence of bytes at a time with no way to perform incremental matching.
In summary, the conventional UTF-8 byte strings provided by this library is an
experiment. They are definitely useful in some limited circumstances, but how
useful they are more broadly isn't clear yet.
# `bstr` in public APIs
Since this library is still experimental, you should not use it in the public
API of your crates until it hits `1.0` (unless you're OK with with tracking
breaking releases of `bstr`).
In general, it should be possible to avoid putting anything in this crate into
your public APIs. Namely, you should never need to use the `ByteSlice` or
`ByteVec` traits as bounds on public APIs, since their only purpose is to
extend the methods on the concrete types `[u8]` and `Vec<u8>`, respectively.
Similarly, it should not be necessary to put either the `BStr` or `BString`
types into public APIs. If you want to use them internally, then they can
be converted to/from `[u8]`/`Vec<u8>` as needed.
# Differences with standard strings
The primary difference between `[u8]` and `str` is that the former is
conventionally UTF-8 while the latter is guaranteed to be UTF-8. The phrase
"conventionally UTF-8" means that a `[u8]` may contain bytes that do not form
a valid UTF-8 sequence, but operations defined on the type in this crate are
generally most useful on valid UTF-8 sequences. For example, iterating over
Unicode codepoints or grapheme clusters is an operation that is only defined
on valid UTF-8. Therefore, when invalid UTF-8 is encountered, the Unicode
replacement codepoint is substituted. Thus, a byte string that is not UTF-8 at
all is of limited utility when using these crate.
However, not all operations on byte strings are specifically Unicode aware. For
example, substring search has no specific Unicode semantics ascribed to it. It
works just as well for byte strings that are completely valid UTF-8 as for byte
strings that contain no valid UTF-8 at all. Similarly for replacements and
various other operations that do not need any Unicode specific tailoring.
Aside from the difference in how UTF-8 is handled, the APIs between `[u8]` and
`str` (and `Vec<u8>` and `String`) are intentionally very similar, including
maintaining the same behavior for corner cases in things like substring
splitting. There are, however, some differences:
* Substring search is not done with `matches`, but instead, `find_iter`.
In general, this crate does not define any generic
[`Pattern`](https://doc.rust-lang.org/std/str/pattern/trait.Pattern.html)
infrastructure, and instead prefers adding new methods for different
argument types. For example, `matches` can search by a `char` or a `&str`,
where as `find_iter` can only search by a byte string. `find_char` can be
used for searching by a `char`.
* Since `SliceConcatExt` in the standard library is unstable, it is not
possible to reuse that to implement `join` and `concat` methods. Instead,
[`join`](fn.join.html) and [`concat`](fn.concat.html) are provided as free
functions that perform a similar task.
* This library bundles in a few more Unicode operations, such as grapheme,
word and sentence iterators. More operations, such as normalization and
case folding, may be provided in the future.
* Some `String`/`str` APIs will panic if a particular index was not on a valid
UTF-8 code unit sequence boundary. Conversely, no such checking is performed
in this crate, as is consistent with treating byte strings as a sequence of
bytes. This means callers are responsible for maintaining a UTF-8 invariant
if that's important.
* Some routines provided by this crate, such as `starts_with_str`, have a
`_str` suffix to differentiate them from similar routines already defined
on the `[u8]` type. The difference is that `starts_with` requires its
parameter to be a `&[u8]`, where as `starts_with_str` permits its parameter
to by anything that implements `AsRef<[u8]>`, which is more flexible. This
means you can write `bytes.starts_with_str("")` instead of
`bytes.starts_with("".as_bytes())`.
Otherwise, you should find most of the APIs between this crate and the standard
library string APIs to be very similar, if not identical.
# Handling of invalid UTF-8
Since byte strings are only *conventionally* UTF-8, there is no guarantee
that byte strings contain valid UTF-8. Indeed, it is perfectly legal for a
byte string to contain arbitrary bytes. However, since this library defines
a *string* type, it provides many operations specified by Unicode. These
operations are typically only defined over codepoints, and thus have no real
meaning on bytes that are invalid UTF-8 because they do not map to a particular
codepoint.
For this reason, whenever operations defined only on codepoints are used, this
library will automatically convert invalid UTF-8 to the Unicode replacement
codepoint, `U+FFFD`, which looks like this: `<EFBFBD>`. For example, an
[iterator over codepoints](struct.Chars.html) will yield a Unicode
replacement codepoint whenever it comes across bytes that are not valid UTF-8:
```
use bstr::ByteSlice;
let bs = b"a\xFF\xFFz";
let chars: Vec<char> = bs.chars().collect();
assert_eq!(vec!['a', '\u{FFFD}', '\u{FFFD}', 'z'], chars);
```
There are a few ways in which invalid bytes can be substituted with a Unicode
replacement codepoint. One way, not used by this crate, is to replace every
individual invalid byte with a single replacement codepoint. In contrast, the
approach this crate uses is called the "substitution of maximal subparts," as
specified by the Unicode Standard (Chapter 3, Section 9). (This approach is
also used by [W3C's Encoding Standard](https://www.w3.org/TR/encoding/).) In
this strategy, a replacement codepoint is inserted whenever a byte is found
that cannot possibly lead to a valid UTF-8 code unit sequence. If there were
previous bytes that represented a *prefix* of a well-formed UTF-8 code unit
sequence, then all of those bytes (up to 3) are substituted with a single
replacement codepoint. For example:
```
use bstr::ByteSlice;
let bs = b"a\xF0\x9F\x87z";
let chars: Vec<char> = bs.chars().collect();
// The bytes \xF0\x9F\x87 could lead to a valid UTF-8 sequence, but 3 of them
// on their own are invalid. Only one replacement codepoint is substituted,
// which demonstrates the "substitution of maximal subparts" strategy.
assert_eq!(vec!['a', '\u{FFFD}', 'z'], chars);
```
If you do need to access the raw bytes for some reason in an iterator like
`Chars`, then you should use the iterator's "indices" variant, which gives
the byte offsets containing the invalid UTF-8 bytes that were substituted with
the replacement codepoint. For example:
```
use bstr::{B, ByteSlice};
let bs = b"a\xE2\x98z";
let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
// Even though the replacement codepoint is encoded as 3 bytes itself, the
// byte range given here is only two bytes, corresponding to the original
// raw bytes.
assert_eq!(vec![(0, 1, 'a'), (1, 3, '\u{FFFD}'), (3, 4, 'z')], chars);
// Thus, getting the original raw bytes is as simple as slicing the original
// byte string:
let chars: Vec<&[u8]> = bs.char_indices().map(|(s, e, _)| &bs[s..e]).collect();
assert_eq!(vec![B("a"), B(b"\xE2\x98"), B("z")], chars);
```
# File paths and OS strings
One of the premiere features of Rust's standard library is how it handles file
paths. In particular, it makes it very hard to write incorrect code while
simultaneously providing a correct cross platform abstraction for manipulating
file paths. The key challenge that one faces with file paths across platforms
is derived from the following observations:
* On most Unix-like systems, file paths are an arbitrary sequence of bytes.
* On Windows, file paths are an arbitrary sequence of 16-bit integers.
(In both cases, certain sequences aren't allowed. For example a `NUL` byte is
not allowed in either case. But we can ignore this for the purposes of this
section.)
Byte strings, like the ones provided in this crate, line up really well with
file paths on Unix like systems, which are themselves just arbitrary sequences
of bytes. It turns out that if you treat them as "mostly UTF-8," then things
work out pretty well. On the contrary, byte strings _don't_ really work
that well on Windows because it's not possible to correctly roundtrip file
paths between 16-bit integers and something that looks like UTF-8 _without_
explicitly defining an encoding to do this for you, which is anathema to byte
strings, which are just bytes.
Rust's standard library elegantly solves this problem by specifying an
internal encoding for file paths that's only used on Windows called
[WTF-8](https://simonsapin.github.io/wtf-8/). Its key properties are that they
permit losslessly roundtripping file paths on Windows by extending UTF-8 to
support an encoding of surrogate codepoints, while simultaneously supporting
zero-cost conversion from Rust's Unicode strings to file paths. (Since UTF-8 is
a proper subset of WTF-8.)
The fundamental point at which the above strategy fails is when you want to
treat file paths as things that look like strings in a zero cost way. In most
cases, this is actually the wrong thing to do, but some cases call for it,
for example, glob or regex matching on file paths. This is because WTF-8 is
treated as an internal implementation detail, and there is no way to access
those bytes via a public API. Therefore, such consumers are limited in what
they can do:
1. One could re-implement WTF-8 and re-encode file paths on Windows to WTF-8
by accessing their underlying 16-bit integer representation. Unfortunately,
this isn't zero cost (it introduces a second WTF-8 decoding step) and it's
not clear this is a good thing to do, since WTF-8 should ideally remain an
internal implementation detail.
2. One could instead declare that they will not handle paths on Windows that
are not valid UTF-16, and return an error when one is encountered.
3. Like (2), but instead of returning an error, lossily decode the file path
on Windows that isn't valid UTF-16 into UTF-16 by replacing invalid bytes
with the Unicode replacement codepoint.
While this library may provide facilities for (1) in the future, currently,
this library only provides facilities for (2) and (3). In particular, a suite
of conversion functions are provided that permit converting between byte
strings, OS strings and file paths. For owned byte strings, they are:
* [`ByteVec::from_os_string`](trait.ByteVec.html#method.from_os_string)
* [`ByteVec::from_os_str_lossy`](trait.ByteVec.html#method.from_os_str_lossy)
* [`ByteVec::from_path_buf`](trait.ByteVec.html#method.from_path_buf)
* [`ByteVec::from_path_lossy`](trait.ByteVec.html#method.from_path_lossy)
* [`ByteVec::into_os_string`](trait.ByteVec.html#method.into_os_string)
* [`ByteVec::into_os_string_lossy`](trait.ByteVec.html#method.into_os_string_lossy)
* [`ByteVec::into_path_buf`](trait.ByteVec.html#method.into_path_buf)
* [`ByteVec::into_path_buf_lossy`](trait.ByteVec.html#method.into_path_buf_lossy)
For byte string slices, they are:
* [`ByteSlice::from_os_str`](trait.ByteSlice.html#method.from_os_str)
* [`ByteSlice::from_path`](trait.ByteSlice.html#method.from_path)
* [`ByteSlice::to_os_str`](trait.ByteSlice.html#method.to_os_str)
* [`ByteSlice::to_os_str_lossy`](trait.ByteSlice.html#method.to_os_str_lossy)
* [`ByteSlice::to_path`](trait.ByteSlice.html#method.to_path)
* [`ByteSlice::to_path_lossy`](trait.ByteSlice.html#method.to_path_lossy)
On Unix, all of these conversions are rigorously zero cost, which gives one
a way to ergonomically deal with raw file paths exactly as they are using
normal string-related functions. On Windows, these conversion routines perform
a UTF-8 check and either return an error or lossily decode the file path
into valid UTF-8, depending on which function you use. This means that you
cannot roundtrip all file paths on Windows correctly using these conversion
routines. However, this may be an acceptable downside since such file paths
are exceptionally rare. Moreover, roundtripping isn't always necessary, for
example, if all you're doing is filtering based on file paths.
The reason why using byte strings for this is potentially superior than the
standard library's approach is that a lot of Rust code is already lossily
converting file paths to Rust's Unicode strings, which are required to be valid
UTF-8, and thus contain latent bugs on Unix where paths with invalid UTF-8 are
not terribly uncommon. If you instead use byte strings, then you're guaranteed
to write correct code for Unix, at the cost of getting a corner case wrong on
Windows.
*/
#![cfg_attr(not(feature = "std"), no_std)]
#![allow(dead_code)]
#[cfg(feature = "std")]
extern crate core;
#[cfg(feature = "unicode")]
#[macro_use]
extern crate lazy_static;
extern crate memchr;
#[cfg(test)]
#[macro_use]
extern crate quickcheck;
#[cfg(feature = "unicode")]
extern crate regex_automata;
#[cfg(feature = "serde1-nostd")]
extern crate serde;
#[cfg(test)]
extern crate ucd_parse;
pub use bstr::BStr;
#[cfg(feature = "std")]
pub use bstring::BString;
pub use ext_slice::{
ByteSlice, Bytes, Fields, FieldsWith, Find, FindReverse, Finder,
FinderReverse, Lines, LinesWithTerminator, Split, SplitN, SplitNReverse,
SplitReverse, B,
};
#[cfg(feature = "std")]
pub use ext_vec::{concat, join, ByteVec, DrainBytes, FromUtf8Error};
#[cfg(feature = "unicode")]
pub use unicode::{
GraphemeIndices, Graphemes, SentenceIndices, Sentences, WordIndices,
Words, WordsWithBreakIndices, WordsWithBreaks,
};
pub use utf8::{
decode as decode_utf8, decode_last as decode_last_utf8, CharIndices,
Chars, Utf8Chunk, Utf8Chunks, Utf8Error,
};
mod ascii;
mod bstr;
#[cfg(feature = "std")]
mod bstring;
mod byteset;
mod cow;
mod ext_slice;
#[cfg(feature = "std")]
mod ext_vec;
mod impls;
#[cfg(feature = "std")]
pub mod io;
mod search;
#[cfg(test)]
mod tests;
#[cfg(feature = "unicode")]
mod unicode;
mod utf8;
#[cfg(test)]
mod apitests {
use bstr::BStr;
use bstring::BString;
use ext_slice::{Finder, FinderReverse};
#[test]
fn oibits() {
use std::panic::{RefUnwindSafe, UnwindSafe};
fn assert_send<T: Send>() {}
fn assert_sync<T: Sync>() {}
fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
assert_send::<&BStr>();
assert_sync::<&BStr>();
assert_unwind_safe::<&BStr>();
assert_send::<BString>();
assert_sync::<BString>();
assert_unwind_safe::<BString>();
assert_send::<Finder>();
assert_sync::<Finder>();
assert_unwind_safe::<Finder>();
assert_send::<FinderReverse>();
assert_sync::<FinderReverse>();
assert_unwind_safe::<FinderReverse>();
}
}

View File

@ -0,0 +1,258 @@
pub const BYTE_FREQUENCIES: [u8; 256] = [
55, // '\x00'
52, // '\x01'
51, // '\x02'
50, // '\x03'
49, // '\x04'
48, // '\x05'
47, // '\x06'
46, // '\x07'
45, // '\x08'
103, // '\t'
242, // '\n'
66, // '\x0b'
67, // '\x0c'
229, // '\r'
44, // '\x0e'
43, // '\x0f'
42, // '\x10'
41, // '\x11'
40, // '\x12'
39, // '\x13'
38, // '\x14'
37, // '\x15'
36, // '\x16'
35, // '\x17'
34, // '\x18'
33, // '\x19'
56, // '\x1a'
32, // '\x1b'
31, // '\x1c'
30, // '\x1d'
29, // '\x1e'
28, // '\x1f'
255, // ' '
148, // '!'
164, // '"'
149, // '#'
136, // '$'
160, // '%'
155, // '&'
173, // "'"
221, // '('
222, // ')'
134, // '*'
122, // '+'
232, // ','
202, // '-'
215, // '.'
224, // '/'
208, // '0'
220, // '1'
204, // '2'
187, // '3'
183, // '4'
179, // '5'
177, // '6'
168, // '7'
178, // '8'
200, // '9'
226, // ':'
195, // ';'
154, // '<'
184, // '='
174, // '>'
126, // '?'
120, // '@'
191, // 'A'
157, // 'B'
194, // 'C'
170, // 'D'
189, // 'E'
162, // 'F'
161, // 'G'
150, // 'H'
193, // 'I'
142, // 'J'
137, // 'K'
171, // 'L'
176, // 'M'
185, // 'N'
167, // 'O'
186, // 'P'
112, // 'Q'
175, // 'R'
192, // 'S'
188, // 'T'
156, // 'U'
140, // 'V'
143, // 'W'
123, // 'X'
133, // 'Y'
128, // 'Z'
147, // '['
138, // '\\'
146, // ']'
114, // '^'
223, // '_'
151, // '`'
249, // 'a'
216, // 'b'
238, // 'c'
236, // 'd'
253, // 'e'
227, // 'f'
218, // 'g'
230, // 'h'
247, // 'i'
135, // 'j'
180, // 'k'
241, // 'l'
233, // 'm'
246, // 'n'
244, // 'o'
231, // 'p'
139, // 'q'
245, // 'r'
243, // 's'
251, // 't'
235, // 'u'
201, // 'v'
196, // 'w'
240, // 'x'
214, // 'y'
152, // 'z'
182, // '{'
205, // '|'
181, // '}'
127, // '~'
27, // '\x7f'
212, // '\x80'
211, // '\x81'
210, // '\x82'
213, // '\x83'
228, // '\x84'
197, // '\x85'
169, // '\x86'
159, // '\x87'
131, // '\x88'
172, // '\x89'
105, // '\x8a'
80, // '\x8b'
98, // '\x8c'
96, // '\x8d'
97, // '\x8e'
81, // '\x8f'
207, // '\x90'
145, // '\x91'
116, // '\x92'
115, // '\x93'
144, // '\x94'
130, // '\x95'
153, // '\x96'
121, // '\x97'
107, // '\x98'
132, // '\x99'
109, // '\x9a'
110, // '\x9b'
124, // '\x9c'
111, // '\x9d'
82, // '\x9e'
108, // '\x9f'
118, // '\xa0'
141, // '¡'
113, // '¢'
129, // '£'
119, // '¤'
125, // '¥'
165, // '¦'
117, // '§'
92, // '¨'
106, // '©'
83, // 'ª'
72, // '«'
99, // '¬'
93, // '\xad'
65, // '®'
79, // '¯'
166, // '°'
237, // '±'
163, // '²'
199, // '³'
190, // '´'
225, // 'µ'
209, // '¶'
203, // '·'
198, // '¸'
217, // '¹'
219, // 'º'
206, // '»'
234, // '¼'
248, // '½'
158, // '¾'
239, // '¿'
255, // 'À'
255, // 'Á'
255, // 'Â'
255, // 'Ã'
255, // 'Ä'
255, // 'Å'
255, // 'Æ'
255, // 'Ç'
255, // 'È'
255, // 'É'
255, // 'Ê'
255, // 'Ë'
255, // 'Ì'
255, // 'Í'
255, // 'Î'
255, // 'Ï'
255, // 'Ð'
255, // 'Ñ'
255, // 'Ò'
255, // 'Ó'
255, // 'Ô'
255, // 'Õ'
255, // 'Ö'
255, // '×'
255, // 'Ø'
255, // 'Ù'
255, // 'Ú'
255, // 'Û'
255, // 'Ü'
255, // 'Ý'
255, // 'Þ'
255, // 'ß'
255, // 'à'
255, // 'á'
255, // 'â'
255, // 'ã'
255, // 'ä'
255, // 'å'
255, // 'æ'
255, // 'ç'
255, // 'è'
255, // 'é'
255, // 'ê'
255, // 'ë'
255, // 'ì'
255, // 'í'
255, // 'î'
255, // 'ï'
255, // 'ð'
255, // 'ñ'
255, // 'ò'
255, // 'ó'
255, // 'ô'
255, // 'õ'
255, // 'ö'
255, // '÷'
255, // 'ø'
255, // 'ù'
255, // 'ú'
255, // 'û'
255, // 'ü'
255, // 'ý'
255, // 'þ'
255, // 'ÿ'
];

View File

@ -0,0 +1,8 @@
pub use self::prefilter::PrefilterState;
pub use self::twoway::TwoWay;
mod byte_frequencies;
mod prefilter;
#[cfg(test)]
mod tests;
mod twoway;

View File

@ -0,0 +1,424 @@
use core::mem;
use ext_slice::ByteSlice;
use search::byte_frequencies::BYTE_FREQUENCIES;
/// PrefilterState tracks state associated with the effectiveness of a
/// prefilter. It is used to track how many bytes, on average, are skipped by
/// the prefilter. If this average dips below a certain threshold over time,
/// then the state renders the prefilter inert and stops using it.
///
/// A prefilter state should be created for each search. (Where creating an
/// iterator via, e.g., `find_iter`, is treated as a single search.)
#[derive(Clone, Debug)]
pub struct PrefilterState {
/// The number of skips that has been executed.
skips: usize,
/// The total number of bytes that have been skipped.
skipped: usize,
/// The maximum length of a match. This is used to help determine how many
/// bytes on average should be skipped in order for a prefilter to be
/// effective.
max_match_len: usize,
/// Once this heuristic has been deemed ineffective, it will be inert
/// throughout the rest of its lifetime. This serves as a cheap way to
/// check inertness.
inert: bool,
}
impl PrefilterState {
/// The minimum number of skip attempts to try before considering whether
/// a prefilter is effective or not.
const MIN_SKIPS: usize = 50;
/// The minimum amount of bytes that skipping must average.
///
/// This value was chosen based on varying it and checking the bstr/find/
/// microbenchmarks. In particular, this can impact the
/// pathological/repeated-{huge,small} benchmarks quite a bit if it's
/// set too low.
const MIN_SKIP_BYTES: usize = 8;
/// Create a fresh prefilter state.
pub fn new(max_match_len: usize) -> PrefilterState {
if max_match_len == 0 {
return PrefilterState::inert();
}
PrefilterState { skips: 0, skipped: 0, max_match_len, inert: false }
}
/// Create a fresh prefilter state that is always inert.
fn inert() -> PrefilterState {
PrefilterState { skips: 0, skipped: 0, max_match_len: 0, inert: true }
}
/// Update this state with the number of bytes skipped on the last
/// invocation of the prefilter.
#[inline]
pub fn update(&mut self, skipped: usize) {
self.skips += 1;
self.skipped += skipped;
}
/// Return true if and only if this state indicates that a prefilter is
/// still effective.
#[inline]
pub fn is_effective(&mut self) -> bool {
if self.inert {
return false;
}
if self.skips < PrefilterState::MIN_SKIPS {
return true;
}
if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips {
return true;
}
// We're inert.
self.inert = true;
false
}
}
/// A heuristic frequency based prefilter for searching a single needle.
///
/// This prefilter attempts to pick out the byte in a needle that is predicted
/// to occur least frequently, and search for that using fast vectorized
/// routines. If a rare enough byte could not be found, then this prefilter's
/// constructors will return `None`.
///
/// This can be combined with `PrefilterState` to dynamically render this
/// prefilter inert if it proves to ineffective.
#[derive(Clone, Debug)]
pub struct Freqy {
/// Whether this prefilter should be used or not.
inert: bool,
/// The length of the needle we're searching for.
needle_len: usize,
/// The rarest byte in the needle, according to pre-computed frequency
/// analysis.
rare1: u8,
/// The leftmost offset of the rarest byte in the needle.
rare1i: usize,
/// The second rarest byte in the needle, according to pre-computed
/// frequency analysis. (This may be equivalent to the rarest byte.)
///
/// The second rarest byte is used as a type of guard for quickly detecting
/// a mismatch after memchr locates an instance of the rarest byte. This
/// is a hedge against pathological cases where the pre-computed frequency
/// analysis may be off. (But of course, does not prevent *all*
/// pathological cases.)
rare2: u8,
/// The leftmost offset of the second rarest byte in the needle.
rare2i: usize,
}
impl Freqy {
/// The maximum frequency rank permitted. If the rarest byte in the needle
/// has a frequency rank above this value, then Freqy is not used.
const MAX_RANK: usize = 200;
/// Return a fresh prefilter state that can be used with this prefilter. A
/// prefilter state is used to track the effectiveness of a prefilter for
/// speeding up searches. Therefore, the prefilter state should generally
/// be reused on subsequent searches (such as in an iterator). For searches
/// on a different haystack, then a new prefilter state should be used.
pub fn prefilter_state(&self) -> PrefilterState {
if self.inert {
PrefilterState::inert()
} else {
PrefilterState::new(self.needle_len)
}
}
/// Returns a valid but inert prefilter. This is valid for both the forward
/// and reverse direction.
///
/// It is never correct to use an inert prefilter. The results of finding
/// the next (or previous) candidate are unspecified.
fn inert() -> Freqy {
Freqy {
inert: true,
needle_len: 0,
rare1: 0,
rare1i: 0,
rare2: 0,
rare2i: 0,
}
}
/// Return search info for the given needle in the forward direction.
pub fn forward(needle: &[u8]) -> Freqy {
if needle.is_empty() {
return Freqy::inert();
}
// Find the rarest two bytes. Try to make them distinct (but it's not
// required).
let (mut rare1, mut rare1i) = (needle[0], 0);
let (mut rare2, mut rare2i) = (needle[0], 0);
if needle.len() >= 2 {
rare2 = needle[1];
rare2i = 1;
}
if Freqy::rank(rare2) < Freqy::rank(rare1) {
mem::swap(&mut rare1, &mut rare2);
mem::swap(&mut rare1i, &mut rare2i);
}
for (i, b) in needle.bytes().enumerate().skip(2) {
if Freqy::rank(b) < Freqy::rank(rare1) {
rare2 = rare1;
rare2i = rare1i;
rare1 = b;
rare1i = i;
} else if b != rare1 && Freqy::rank(b) < Freqy::rank(rare2) {
rare2 = b;
rare2i = i;
}
}
if Freqy::rank(rare1) > Freqy::MAX_RANK {
return Freqy::inert();
}
let needle_len = needle.len();
Freqy { inert: false, needle_len, rare1, rare1i, rare2, rare2i }
}
/// Return search info for the given needle in the reverse direction.
pub fn reverse(needle: &[u8]) -> Freqy {
if needle.is_empty() {
return Freqy::inert();
}
// Find the rarest two bytes. Try to make them distinct (but it's not
// required). In reverse, the offsets correspond to the number of bytes
// from the end of the needle. So `0` is the last byte in the needle.
let (mut rare1i, mut rare2i) = (0, 0);
if needle.len() >= 2 {
rare2i += 1;
}
let mut rare1 = needle[needle.len() - rare1i - 1];
let mut rare2 = needle[needle.len() - rare2i - 1];
if Freqy::rank(rare2) < Freqy::rank(rare1) {
mem::swap(&mut rare1, &mut rare2);
mem::swap(&mut rare1i, &mut rare2i);
}
for (i, b) in needle.bytes().rev().enumerate().skip(2) {
if Freqy::rank(b) < Freqy::rank(rare1) {
rare2 = rare1;
rare2i = rare1i;
rare1 = b;
rare1i = i;
} else if b != rare1 && Freqy::rank(b) < Freqy::rank(rare2) {
rare2 = b;
rare2i = i;
}
}
if Freqy::rank(rare1) > Freqy::MAX_RANK {
return Freqy::inert();
}
let needle_len = needle.len();
Freqy { inert: false, needle_len, rare1, rare1i, rare2, rare2i }
}
/// Look for a possible occurrence of needle. The position returned
/// corresponds to the beginning of the occurrence, if one exists.
///
/// Callers may assume that this never returns false negatives (i.e., it
/// never misses an actual occurrence), but must check that the returned
/// position corresponds to a match. That is, it can return false
/// positives.
///
/// This should only be used when Freqy is constructed for forward
/// searching.
pub fn find_candidate(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
) -> Option<usize> {
debug_assert!(!self.inert);
let mut i = 0;
while prestate.is_effective() {
// Use a fast vectorized implementation to skip to the next
// occurrence of the rarest byte (heuristically chosen) in the
// needle.
i += match haystack[i..].find_byte(self.rare1) {
None => return None,
Some(found) => {
prestate.update(found);
found
}
};
// If we can't align our first match with the haystack, then a
// match is impossible.
if i < self.rare1i {
i += 1;
continue;
}
// Align our rare2 byte with the haystack. A mismatch means that
// a match is impossible.
let aligned_rare2i = i - self.rare1i + self.rare2i;
if haystack.get(aligned_rare2i) != Some(&self.rare2) {
i += 1;
continue;
}
// We've done what we can. There might be a match here.
return Some(i - self.rare1i);
}
// The only way we get here is if we believe our skipping heuristic
// has become ineffective. We're allowed to return false positives,
// so return the position at which we advanced to, aligned to the
// haystack.
Some(i.saturating_sub(self.rare1i))
}
/// Look for a possible occurrence of needle, in reverse, starting from the
/// end of the given haystack. The position returned corresponds to the
/// position immediately after the end of the occurrence, if one exists.
///
/// Callers may assume that this never returns false negatives (i.e., it
/// never misses an actual occurrence), but must check that the returned
/// position corresponds to a match. That is, it can return false
/// positives.
///
/// This should only be used when Freqy is constructed for reverse
/// searching.
pub fn rfind_candidate(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
) -> Option<usize> {
debug_assert!(!self.inert);
let mut i = haystack.len();
while prestate.is_effective() {
// Use a fast vectorized implementation to skip to the next
// occurrence of the rarest byte (heuristically chosen) in the
// needle.
i = match haystack[..i].rfind_byte(self.rare1) {
None => return None,
Some(found) => {
prestate.update(i - found);
found
}
};
// If we can't align our first match with the haystack, then a
// match is impossible.
if i + self.rare1i + 1 > haystack.len() {
continue;
}
// Align our rare2 byte with the haystack. A mismatch means that
// a match is impossible.
let aligned = match (i + self.rare1i).checked_sub(self.rare2i) {
None => continue,
Some(aligned) => aligned,
};
if haystack.get(aligned) != Some(&self.rare2) {
continue;
}
// We've done what we can. There might be a match here.
return Some(i + self.rare1i + 1);
}
// The only way we get here is if we believe our skipping heuristic
// has become ineffective. We're allowed to return false positives,
// so return the position at which we advanced to, aligned to the
// haystack.
Some(i + self.rare1i + 1)
}
/// Return the heuristical frequency rank of the given byte. A lower rank
/// means the byte is believed to occur less frequently.
fn rank(b: u8) -> usize {
BYTE_FREQUENCIES[b as usize] as usize
}
}
#[cfg(test)]
mod tests {
use super::*;
use ext_slice::B;
#[test]
fn freqy_forward() {
// N.B. We sometimes use uppercase here since that mostly ensures freqy
// will be constructable. Lowercase letters may be too common for freqy
// to work.
let s = Freqy::forward(B("BAR"));
let mut pre = s.prefilter_state();
assert_eq!(Some(0), s.find_candidate(&mut pre, B("BARFOO")));
let s = Freqy::forward(B("BAR"));
let mut pre = s.prefilter_state();
assert_eq!(Some(3), s.find_candidate(&mut pre, B("FOOBAR")));
let s = Freqy::forward(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(Some(0), s.find_candidate(&mut pre, B("zyzz")));
let s = Freqy::forward(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(Some(2), s.find_candidate(&mut pre, B("zzzy")));
let s = Freqy::forward(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(None, s.find_candidate(&mut pre, B("zazb")));
let s = Freqy::forward(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(Some(0), s.find_candidate(&mut pre, B("yzyy")));
let s = Freqy::forward(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(Some(2), s.find_candidate(&mut pre, B("yyyz")));
let s = Freqy::forward(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(None, s.find_candidate(&mut pre, B("yayb")));
}
#[test]
fn freqy_reverse() {
// N.B. We sometimes use uppercase here since that mostly ensures freqy
// will be constructable. Lowercase letters may be too common for freqy
// to work.
let s = Freqy::reverse(B("BAR"));
let mut pre = s.prefilter_state();
assert_eq!(Some(3), s.rfind_candidate(&mut pre, B("BARFOO")));
let s = Freqy::reverse(B("BAR"));
let mut pre = s.prefilter_state();
assert_eq!(Some(6), s.rfind_candidate(&mut pre, B("FOOBAR")));
let s = Freqy::reverse(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(Some(2), s.rfind_candidate(&mut pre, B("zyzz")));
let s = Freqy::reverse(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(Some(4), s.rfind_candidate(&mut pre, B("zzzy")));
let s = Freqy::reverse(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(None, s.rfind_candidate(&mut pre, B("zazb")));
let s = Freqy::reverse(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(Some(2), s.rfind_candidate(&mut pre, B("yzyy")));
let s = Freqy::reverse(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(Some(4), s.rfind_candidate(&mut pre, B("yyyz")));
let s = Freqy::reverse(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(None, s.rfind_candidate(&mut pre, B("yayb")));
}
}

View File

@ -0,0 +1,225 @@
use search::twoway::TwoWay;
/// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple.
type SearchTest = (&'static str, &'static str, Option<usize>, Option<usize>);
const SEARCH_TESTS: &'static [SearchTest] = &[
("", "", Some(0), Some(0)),
("", "a", Some(0), Some(1)),
("", "ab", Some(0), Some(2)),
("", "abc", Some(0), Some(3)),
("a", "", None, None),
("a", "a", Some(0), Some(0)),
("a", "aa", Some(0), Some(1)),
("a", "ba", Some(1), Some(1)),
("a", "bba", Some(2), Some(2)),
("a", "bbba", Some(3), Some(3)),
("a", "bbbab", Some(3), Some(3)),
("a", "bbbabb", Some(3), Some(3)),
("a", "bbbabbb", Some(3), Some(3)),
("a", "bbbbbb", None, None),
("ab", "", None, None),
("ab", "a", None, None),
("ab", "b", None, None),
("ab", "ab", Some(0), Some(0)),
("ab", "aab", Some(1), Some(1)),
("ab", "aaab", Some(2), Some(2)),
("ab", "abaab", Some(0), Some(3)),
("ab", "baaab", Some(3), Some(3)),
("ab", "acb", None, None),
("ab", "abba", Some(0), Some(0)),
("abc", "ab", None, None),
("abc", "abc", Some(0), Some(0)),
("abc", "abcz", Some(0), Some(0)),
("abc", "abczz", Some(0), Some(0)),
("abc", "zabc", Some(1), Some(1)),
("abc", "zzabc", Some(2), Some(2)),
("abc", "azbc", None, None),
("abc", "abzc", None, None),
("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)),
("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)),
// Failures caught by quickcheck.
("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)),
("\u{0}\u{1e}", "\u{1e}\u{0}", None, None),
];
#[test]
fn unit_twoway_fwd() {
run_search_tests_fwd("TwoWay", |n, h| TwoWay::forward(n).find(h));
}
#[test]
fn unit_twoway_rev() {
run_search_tests_rev("TwoWay", |n, h| TwoWay::reverse(n).rfind(h));
}
/// Run the substring search tests. `name` should be the type of searcher used,
/// for diagnostics. `search` should be a closure that accepts a needle and a
/// haystack and returns the starting position of the first occurrence of
/// needle in the haystack, or `None` if one doesn't exist.
fn run_search_tests_fwd(
name: &str,
mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
) {
for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS {
let (n, h) = (needle.as_bytes(), haystack.as_bytes());
assert_eq!(
expected_fwd,
search(n, h),
"{}: needle: {:?}, haystack: {:?}, expected: {:?}",
name,
n,
h,
expected_fwd
);
}
}
/// Run the substring search tests. `name` should be the type of searcher used,
/// for diagnostics. `search` should be a closure that accepts a needle and a
/// haystack and returns the starting position of the last occurrence of
/// needle in the haystack, or `None` if one doesn't exist.
fn run_search_tests_rev(
name: &str,
mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
) {
for &(needle, haystack, _, expected_rev) in SEARCH_TESTS {
let (n, h) = (needle.as_bytes(), haystack.as_bytes());
assert_eq!(
expected_rev,
search(n, h),
"{}: needle: {:?}, haystack: {:?}, expected: {:?}",
name,
n,
h,
expected_rev
);
}
}
quickcheck! {
fn qc_twoway_fwd_prefix_is_substring(bs: Vec<u8>) -> bool {
prop_prefix_is_substring(false, &bs, |n, h| TwoWay::forward(n).find(h))
}
fn qc_twoway_fwd_suffix_is_substring(bs: Vec<u8>) -> bool {
prop_suffix_is_substring(false, &bs, |n, h| TwoWay::forward(n).find(h))
}
fn qc_twoway_rev_prefix_is_substring(bs: Vec<u8>) -> bool {
prop_prefix_is_substring(true, &bs, |n, h| TwoWay::reverse(n).rfind(h))
}
fn qc_twoway_rev_suffix_is_substring(bs: Vec<u8>) -> bool {
prop_suffix_is_substring(true, &bs, |n, h| TwoWay::reverse(n).rfind(h))
}
fn qc_twoway_fwd_matches_naive(
needle: Vec<u8>,
haystack: Vec<u8>
) -> bool {
prop_matches_naive(
false,
&needle,
&haystack,
|n, h| TwoWay::forward(n).find(h),
)
}
fn qc_twoway_rev_matches_naive(
needle: Vec<u8>,
haystack: Vec<u8>
) -> bool {
prop_matches_naive(
true,
&needle,
&haystack,
|n, h| TwoWay::reverse(n).rfind(h),
)
}
}
/// Check that every prefix of the given byte string is a substring.
fn prop_prefix_is_substring(
reverse: bool,
bs: &[u8],
mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
) -> bool {
if bs.is_empty() {
return true;
}
for i in 0..(bs.len() - 1) {
let prefix = &bs[..i];
if reverse {
assert_eq!(naive_rfind(prefix, bs), search(prefix, bs));
} else {
assert_eq!(naive_find(prefix, bs), search(prefix, bs));
}
}
true
}
/// Check that every suffix of the given byte string is a substring.
fn prop_suffix_is_substring(
reverse: bool,
bs: &[u8],
mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
) -> bool {
if bs.is_empty() {
return true;
}
for i in 0..(bs.len() - 1) {
let suffix = &bs[i..];
if reverse {
assert_eq!(naive_rfind(suffix, bs), search(suffix, bs));
} else {
assert_eq!(naive_find(suffix, bs), search(suffix, bs));
}
}
true
}
/// Check that naive substring search matches the result of the given search
/// algorithm.
fn prop_matches_naive(
reverse: bool,
needle: &[u8],
haystack: &[u8],
mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
) -> bool {
if reverse {
naive_rfind(needle, haystack) == search(needle, haystack)
} else {
naive_find(needle, haystack) == search(needle, haystack)
}
}
/// Naively search forwards for the given needle in the given haystack.
fn naive_find(needle: &[u8], haystack: &[u8]) -> Option<usize> {
if needle.is_empty() {
return Some(0);
} else if haystack.len() < needle.len() {
return None;
}
for i in 0..(haystack.len() - needle.len() + 1) {
if needle == &haystack[i..i + needle.len()] {
return Some(i);
}
}
None
}
/// Naively search in reverse for the given needle in the given haystack.
fn naive_rfind(needle: &[u8], haystack: &[u8]) -> Option<usize> {
if needle.is_empty() {
return Some(haystack.len());
} else if haystack.len() < needle.len() {
return None;
}
for i in (0..(haystack.len() - needle.len() + 1)).rev() {
if needle == &haystack[i..i + needle.len()] {
return Some(i);
}
}
None
}

View File

@ -0,0 +1,871 @@
use core::cmp;
use cow::CowBytes;
use ext_slice::ByteSlice;
use search::prefilter::{Freqy, PrefilterState};
/// An implementation of the TwoWay substring search algorithm, with heuristics
/// for accelerating search based on frequency analysis.
///
/// This searcher supports forward and reverse search, although not
/// simultaneously. It runs in O(n + m) time and O(1) space, where
/// `n ~ len(needle)` and `m ~ len(haystack)`.
///
/// The implementation here roughly matches that which was developed by
/// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The
/// only change in this implementation is the use of zero-based indices and
/// the addition of heuristics for a fast skip loop. That is, this will detect
/// bytes that are believed to be rare in the needle and use fast vectorized
/// instructions to find their occurrences quickly. The Two-Way algorithm is
/// then used to confirm whether a match at that location occurred.
///
/// The heuristic for fast skipping is automatically shut off if it's
/// detected to be ineffective at search time. Generally, this only occurs in
/// pathological cases. But this is generally necessary in order to preserve
/// a `O(n + m)` time bound.
///
/// The code below is fairly complex and not obviously correct at all. It's
/// likely necessary to read the Two-Way paper cited above in order to fully
/// grok this code.
#[derive(Clone, Debug)]
pub struct TwoWay<'b> {
/// The needle that we're looking for.
needle: CowBytes<'b>,
/// An implementation of a fast skip loop based on hard-coded frequency
/// data. This is only used when conditions are deemed favorable.
freqy: Freqy,
/// A critical position in needle. Specifically, this position corresponds
/// to beginning of either the minimal or maximal suffix in needle. (N.B.
/// See SuffixType below for why "minimal" isn't quite the correct word
/// here.)
///
/// This is the position at which every search begins. Namely, search
/// starts by scanning text to the right of this position, and only if
/// there's a match does the text to the left of this position get scanned.
critical_pos: usize,
/// The amount we shift by in the Two-Way search algorithm. This
/// corresponds to the "small period" and "large period" cases.
shift: Shift,
}
impl<'b> TwoWay<'b> {
/// Create a searcher that uses the Two-Way algorithm by searching forwards
/// through any haystack.
pub fn forward(needle: &'b [u8]) -> TwoWay<'b> {
let freqy = Freqy::forward(needle);
if needle.is_empty() {
return TwoWay {
needle: CowBytes::new(needle),
freqy,
critical_pos: 0,
shift: Shift::Large { shift: 0 },
};
}
let min_suffix = Suffix::forward(needle, SuffixKind::Minimal);
let max_suffix = Suffix::forward(needle, SuffixKind::Maximal);
let (period_lower_bound, critical_pos) =
if min_suffix.pos > max_suffix.pos {
(min_suffix.period, min_suffix.pos)
} else {
(max_suffix.period, max_suffix.pos)
};
let shift = Shift::forward(needle, period_lower_bound, critical_pos);
let needle = CowBytes::new(needle);
TwoWay { needle, freqy, critical_pos, shift }
}
/// Create a searcher that uses the Two-Way algorithm by searching in
/// reverse through any haystack.
pub fn reverse(needle: &'b [u8]) -> TwoWay<'b> {
let freqy = Freqy::reverse(needle);
if needle.is_empty() {
return TwoWay {
needle: CowBytes::new(needle),
freqy,
critical_pos: 0,
shift: Shift::Large { shift: 0 },
};
}
let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal);
let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal);
let (period_lower_bound, critical_pos) =
if min_suffix.pos < max_suffix.pos {
(min_suffix.period, min_suffix.pos)
} else {
(max_suffix.period, max_suffix.pos)
};
let shift = Shift::reverse(needle, period_lower_bound, critical_pos);
let needle = CowBytes::new(needle);
TwoWay { needle, freqy, critical_pos, shift }
}
/// Return a fresh prefilter state that can be used with this searcher.
/// A prefilter state is used to track the effectiveness of a searcher's
/// prefilter for speeding up searches. Therefore, the prefilter state
/// should generally be reused on subsequent searches (such as in an
/// iterator). For searches on a different haystack, then a new prefilter
/// state should be used.
///
/// This always initializes a valid prefilter state even if this searcher
/// does not have a prefilter enabled.
pub fn prefilter_state(&self) -> PrefilterState {
self.freqy.prefilter_state()
}
/// Return the needle used by this searcher.
pub fn needle(&self) -> &[u8] {
self.needle.as_slice()
}
/// Convert this searched into an owned version, where the needle is
/// copied if it isn't already owned.
#[cfg(feature = "std")]
pub fn into_owned(self) -> TwoWay<'static> {
TwoWay {
needle: self.needle.into_owned(),
freqy: self.freqy,
critical_pos: self.critical_pos,
shift: self.shift,
}
}
/// Find the position of the first occurrence of this searcher's needle in
/// the given haystack. If one does not exist, then return None.
///
/// This will automatically initialize prefilter state. This should only
/// be used for one-off searches.
pub fn find(&self, haystack: &[u8]) -> Option<usize> {
self.find_with(&mut self.prefilter_state(), haystack)
}
/// Find the position of the last occurrence of this searcher's needle
/// in the given haystack. If one does not exist, then return None.
///
/// This will automatically initialize prefilter state. This should only
/// be used for one-off searches.
pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
self.rfind_with(&mut self.prefilter_state(), haystack)
}
/// Find the position of the first occurrence of this searcher's needle in
/// the given haystack. If one does not exist, then return None.
///
/// This accepts prefilter state that is useful when using the same
/// searcher multiple times, such as in an iterator.
pub fn find_with(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
) -> Option<usize> {
if self.needle.is_empty() {
return Some(0);
} else if haystack.len() < self.needle.len() {
return None;
} else if self.needle.len() == 1 {
return haystack.find_byte(self.needle[0]);
}
match self.shift {
Shift::Small { period } => {
self.find_small(prestate, haystack, period)
}
Shift::Large { shift } => {
self.find_large(prestate, haystack, shift)
}
}
}
/// Find the position of the last occurrence of this searcher's needle
/// in the given haystack. If one does not exist, then return None.
///
/// This accepts prefilter state that is useful when using the same
/// searcher multiple times, such as in an iterator.
pub fn rfind_with(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
) -> Option<usize> {
if self.needle.is_empty() {
return Some(haystack.len());
} else if haystack.len() < self.needle.len() {
return None;
} else if self.needle.len() == 1 {
return haystack.rfind_byte(self.needle[0]);
}
match self.shift {
Shift::Small { period } => {
self.rfind_small(prestate, haystack, period)
}
Shift::Large { shift } => {
self.rfind_large(prestate, haystack, shift)
}
}
}
// Below is the actual implementation of TwoWay searching, including both
// forwards and backwards searching. Each forward and reverse search has
// two fairly similar implementations, each handling the small and large
// period cases, for a total 4 different search routines.
//
// On top of that, each search implementation can be accelerated by a
// Freqy prefilter, but it is not always enabled. To avoid its overhead
// when its disabled, we explicitly inline each search implementation based
// on whether Freqy will be used or not. This brings us up to a total of
// 8 monomorphized versions of the search code.
#[inline(never)]
fn find_small(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
period: usize,
) -> Option<usize> {
if prestate.is_effective() {
self.find_small_imp(prestate, true, haystack, period)
} else {
self.find_small_imp(prestate, false, haystack, period)
}
}
#[inline(always)]
fn find_small_imp(
&self,
prestate: &mut PrefilterState,
prefilter: bool,
haystack: &[u8],
period: usize,
) -> Option<usize> {
let needle = self.needle.as_slice();
let mut pos = 0;
let mut shift = 0;
while pos + needle.len() <= haystack.len() {
let mut i = cmp::max(self.critical_pos, shift);
if prefilter && prestate.is_effective() {
match self.freqy.find_candidate(prestate, &haystack[pos..]) {
None => return None,
Some(found) => {
shift = 0;
i = self.critical_pos;
pos += found;
if pos + needle.len() > haystack.len() {
return None;
}
}
}
}
while i < needle.len() && needle[i] == haystack[pos + i] {
i += 1;
}
if i < needle.len() {
pos += i - self.critical_pos + 1;
shift = 0;
} else {
let mut j = self.critical_pos;
while j > shift && needle[j] == haystack[pos + j] {
j -= 1;
}
if j <= shift && needle[shift] == haystack[pos + shift] {
return Some(pos);
}
pos += period;
shift = needle.len() - period;
}
}
None
}
#[inline(never)]
fn find_large(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
shift: usize,
) -> Option<usize> {
if prestate.is_effective() {
self.find_large_imp(prestate, true, haystack, shift)
} else {
self.find_large_imp(prestate, false, haystack, shift)
}
}
#[inline(always)]
fn find_large_imp(
&self,
prestate: &mut PrefilterState,
prefilter: bool,
haystack: &[u8],
shift: usize,
) -> Option<usize> {
let needle = self.needle.as_slice();
let mut pos = 0;
while pos + needle.len() <= haystack.len() {
let mut i = self.critical_pos;
if prefilter && prestate.is_effective() {
match self.freqy.find_candidate(prestate, &haystack[pos..]) {
None => return None,
Some(found) => {
pos += found;
if pos + needle.len() > haystack.len() {
return None;
}
}
}
}
while i < needle.len() && needle[i] == haystack[pos + i] {
i += 1;
}
if i < needle.len() {
pos += i - self.critical_pos + 1;
} else {
let mut j = self.critical_pos;
while j > 0 && needle[j] == haystack[pos + j] {
j -= 1;
}
if j == 0 && needle[0] == haystack[pos] {
return Some(pos);
}
pos += shift;
}
}
None
}
#[inline(never)]
fn rfind_small(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
period: usize,
) -> Option<usize> {
if prestate.is_effective() {
self.rfind_small_imp(prestate, true, haystack, period)
} else {
self.rfind_small_imp(prestate, false, haystack, period)
}
}
#[inline(always)]
fn rfind_small_imp(
&self,
prestate: &mut PrefilterState,
prefilter: bool,
haystack: &[u8],
period: usize,
) -> Option<usize> {
let needle = &*self.needle;
let nlen = needle.len();
let mut pos = haystack.len();
let mut shift = nlen;
while pos >= nlen {
let mut i = cmp::min(self.critical_pos, shift);
if prefilter && prestate.is_effective() {
match self.freqy.rfind_candidate(prestate, &haystack[..pos]) {
None => return None,
Some(found) => {
shift = nlen;
i = self.critical_pos;
pos = found;
if pos < nlen {
return None;
}
}
}
}
while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] {
i -= 1;
}
if i > 0 || needle[0] != haystack[pos - nlen] {
pos -= self.critical_pos - i + 1;
shift = nlen;
} else {
let mut j = self.critical_pos;
while j < shift && needle[j] == haystack[pos - nlen + j] {
j += 1;
}
if j == shift {
return Some(pos - nlen);
}
pos -= period;
shift = period;
}
}
None
}
#[inline(never)]
fn rfind_large(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
shift: usize,
) -> Option<usize> {
if prestate.is_effective() {
self.rfind_large_imp(prestate, true, haystack, shift)
} else {
self.rfind_large_imp(prestate, false, haystack, shift)
}
}
#[inline(always)]
fn rfind_large_imp(
&self,
prestate: &mut PrefilterState,
prefilter: bool,
haystack: &[u8],
shift: usize,
) -> Option<usize> {
let needle = &*self.needle;
let nlen = needle.len();
let mut pos = haystack.len();
while pos >= nlen {
if prefilter && prestate.is_effective() {
match self.freqy.rfind_candidate(prestate, &haystack[..pos]) {
None => return None,
Some(found) => {
pos = found;
if pos < nlen {
return None;
}
}
}
}
let mut i = self.critical_pos;
while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] {
i -= 1;
}
if i > 0 || needle[0] != haystack[pos - nlen] {
pos -= self.critical_pos - i + 1;
} else {
let mut j = self.critical_pos;
while j < nlen && needle[j] == haystack[pos - nlen + j] {
j += 1;
}
if j == nlen {
return Some(pos - nlen);
}
pos -= shift;
}
}
None
}
}
/// A representation of the amount we're allowed to shift by during Two-Way
/// search.
///
/// When computing a critical factorization of the needle, we find the position
/// of the critical factorization by finding the needle's maximal (or minimal)
/// suffix, along with the period of that suffix. It turns out that the period
/// of that suffix is a lower bound on the period of the needle itself.
///
/// This lower bound is equivalent to the actual period of the needle in
/// some cases. To describe that case, we denote the needle as `x` where
/// `x = uv` and `v` is the lexicographic maximal suffix of `v`. The lower
/// bound given here is always the period of `v`, which is `<= period(x)`. The
/// case where `period(v) == period(x)` occurs when `len(u) < (len(x) / 2)` and
/// where `u` is a suffix of `v[0..period(v)]`.
///
/// This case is important because the search algorithm for when the
/// periods are equivalent is slightly different than the search algorithm
/// for when the periods are not equivalent. In particular, when they aren't
/// equivalent, we know that the period of the needle is no less than half its
/// length. In this case, we shift by an amount less than or equal to the
/// period of the needle (determined by the maximum length of the components
/// of the critical factorization of `x`, i.e., `max(len(u), len(v))`)..
///
/// The above two cases are represented by the variants below. Each entails
/// a different instantiation of the Two-Way search algorithm.
///
/// N.B. If we could find a way to compute the exact period in all cases,
/// then we could collapse this case analysis and simplify the algorithm. The
/// Two-Way paper suggests this is possible, but more reading is required to
/// grok why the authors didn't pursue that path.
#[derive(Clone, Debug)]
enum Shift {
Small { period: usize },
Large { shift: usize },
}
impl Shift {
/// Compute the shift for a given needle in the forward direction.
///
/// This requires a lower bound on the period and a critical position.
/// These can be computed by extracting both the minimal and maximal
/// lexicographic suffixes, and choosing the right-most starting position.
/// The lower bound on the period is then the period of the chosen suffix.
fn forward(
needle: &[u8],
period_lower_bound: usize,
critical_pos: usize,
) -> Shift {
let large = cmp::max(critical_pos, needle.len() - critical_pos);
if critical_pos * 2 >= needle.len() {
return Shift::Large { shift: large };
}
let (u, v) = needle.split_at(critical_pos);
if !v[..period_lower_bound].ends_with(u) {
return Shift::Large { shift: large };
}
Shift::Small { period: period_lower_bound }
}
/// Compute the shift for a given needle in the reverse direction.
///
/// This requires a lower bound on the period and a critical position.
/// These can be computed by extracting both the minimal and maximal
/// lexicographic suffixes, and choosing the left-most starting position.
/// The lower bound on the period is then the period of the chosen suffix.
fn reverse(
needle: &[u8],
period_lower_bound: usize,
critical_pos: usize,
) -> Shift {
let large = cmp::max(critical_pos, needle.len() - critical_pos);
if (needle.len() - critical_pos) * 2 >= needle.len() {
return Shift::Large { shift: large };
}
let (v, u) = needle.split_at(critical_pos);
if !v[v.len() - period_lower_bound..].starts_with(u) {
return Shift::Large { shift: large };
}
Shift::Small { period: period_lower_bound }
}
}
/// A suffix extracted from a needle along with its period.
#[derive(Debug)]
struct Suffix {
/// The starting position of this suffix.
///
/// If this is a forward suffix, then `&bytes[pos..]` can be used. If this
/// is a reverse suffix, then `&bytes[..pos]` can be used. That is, for
/// forward suffixes, this is an inclusive starting position, where as for
/// reverse suffixes, this is an exclusive ending position.
pos: usize,
/// The period of this suffix.
///
/// Note that this is NOT necessarily the period of the string from which
/// this suffix comes from. (It is always less than or equal to the period
/// of the original string.)
period: usize,
}
impl Suffix {
fn forward(needle: &[u8], kind: SuffixKind) -> Suffix {
debug_assert!(!needle.is_empty());
// suffix represents our maximal (or minimal) suffix, along with
// its period.
let mut suffix = Suffix { pos: 0, period: 1 };
// The start of a suffix in `needle` that we are considering as a
// more maximal (or minimal) suffix than what's in `suffix`.
let mut candidate_start = 1;
// The current offset of our suffixes that we're comparing.
//
// When the characters at this offset are the same, then we mush on
// to the next position since no decision is possible. When the
// candidate's character is greater (or lesser) than the corresponding
// character than our current maximal (or minimal) suffix, then the
// current suffix is changed over to the candidate and we restart our
// search. Otherwise, the candidate suffix is no good and we restart
// our search on the next candidate.
//
// The three cases above correspond to the three cases in the loop
// below.
let mut offset = 0;
while candidate_start + offset < needle.len() {
let current = needle[suffix.pos + offset];
let candidate = needle[candidate_start + offset];
match kind.cmp(current, candidate) {
SuffixOrdering::Accept => {
suffix = Suffix { pos: candidate_start, period: 1 };
candidate_start += 1;
offset = 0;
}
SuffixOrdering::Skip => {
candidate_start += offset + 1;
offset = 0;
suffix.period = candidate_start - suffix.pos;
}
SuffixOrdering::Push => {
if offset + 1 == suffix.period {
candidate_start += suffix.period;
offset = 0;
} else {
offset += 1;
}
}
}
}
suffix
}
fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix {
debug_assert!(!needle.is_empty());
// See the comments in `forward` for how this works.
let mut suffix = Suffix { pos: needle.len(), period: 1 };
if needle.len() == 1 {
return suffix;
}
let mut candidate_start = needle.len() - 1;
let mut offset = 0;
while offset < candidate_start {
let current = needle[suffix.pos - offset - 1];
let candidate = needle[candidate_start - offset - 1];
match kind.cmp(current, candidate) {
SuffixOrdering::Accept => {
suffix = Suffix { pos: candidate_start, period: 1 };
candidate_start -= 1;
offset = 0;
}
SuffixOrdering::Skip => {
candidate_start -= offset + 1;
offset = 0;
suffix.period = suffix.pos - candidate_start;
}
SuffixOrdering::Push => {
if offset + 1 == suffix.period {
candidate_start -= suffix.period;
offset = 0;
} else {
offset += 1;
}
}
}
}
suffix
}
}
/// The kind of suffix to extract.
#[derive(Clone, Copy, Debug)]
enum SuffixKind {
/// Extract the smallest lexicographic suffix from a string.
///
/// Technically, this doesn't actually pick the smallest lexicographic
/// suffix. e.g., Given the choice between `a` and `aa`, this will choose
/// the latter over the former, even though `a < aa`. The reasoning for
/// this isn't clear from the paper, but it still smells like a minimal
/// suffix.
Minimal,
/// Extract the largest lexicographic suffix from a string.
///
/// Unlike `Minimal`, this really does pick the maximum suffix. e.g., Given
/// the choice between `z` and `zz`, this will choose the latter over the
/// former.
Maximal,
}
/// The result of comparing corresponding bytes between two suffixes.
#[derive(Clone, Copy, Debug)]
enum SuffixOrdering {
/// This occurs when the given candidate byte indicates that the candidate
/// suffix is better than the current maximal (or minimal) suffix. That is,
/// the current candidate suffix should supplant the current maximal (or
/// minimal) suffix.
Accept,
/// This occurs when the given candidate byte excludes the candidate suffix
/// from being better than the current maximal (or minimal) suffix. That
/// is, the current candidate suffix should be dropped and the next one
/// should be considered.
Skip,
/// This occurs when no decision to accept or skip the candidate suffix
/// can be made, e.g., when corresponding bytes are equivalent. In this
/// case, the next corresponding bytes should be compared.
Push,
}
impl SuffixKind {
/// Returns true if and only if the given candidate byte indicates that
/// it should replace the current suffix as the maximal (or minimal)
/// suffix.
fn cmp(self, current: u8, candidate: u8) -> SuffixOrdering {
use self::SuffixOrdering::*;
match self {
SuffixKind::Minimal if candidate < current => Accept,
SuffixKind::Minimal if candidate > current => Skip,
SuffixKind::Minimal => Push,
SuffixKind::Maximal if candidate > current => Accept,
SuffixKind::Maximal if candidate < current => Skip,
SuffixKind::Maximal => Push,
}
}
}
// N.B. There are more holistic tests in src/search/tests.rs.
#[cfg(test)]
mod tests {
use super::*;
use ext_slice::B;
/// Convenience wrapper for computing the suffix as a byte string.
fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) {
let s = Suffix::forward(needle, kind);
(&needle[s.pos..], s.period)
}
/// Convenience wrapper for computing the reverse suffix as a byte string.
fn get_suffix_reverse(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) {
let s = Suffix::reverse(needle, kind);
(&needle[..s.pos], s.period)
}
/// Return all of the non-empty suffixes in the given byte string.
fn suffixes(bytes: &[u8]) -> Vec<&[u8]> {
(0..bytes.len()).map(|i| &bytes[i..]).collect()
}
/// Return the lexicographically maximal suffix of the given byte string.
fn naive_maximal_suffix_forward(needle: &[u8]) -> &[u8] {
let mut sufs = suffixes(needle);
sufs.sort();
sufs.pop().unwrap()
}
/// Return the lexicographically maximal suffix of the reverse of the given
/// byte string.
fn naive_maximal_suffix_reverse(needle: &[u8]) -> Vec<u8> {
let mut reversed = needle.to_vec();
reversed.reverse();
let mut got = naive_maximal_suffix_forward(&reversed).to_vec();
got.reverse();
got
}
#[test]
fn suffix_forward() {
macro_rules! assert_suffix_min {
($given:expr, $expected:expr, $period:expr) => {
let (got_suffix, got_period) =
get_suffix_forward($given.as_bytes(), SuffixKind::Minimal);
assert_eq!((B($expected), $period), (got_suffix, got_period));
};
}
macro_rules! assert_suffix_max {
($given:expr, $expected:expr, $period:expr) => {
let (got_suffix, got_period) =
get_suffix_forward($given.as_bytes(), SuffixKind::Maximal);
assert_eq!((B($expected), $period), (got_suffix, got_period));
};
}
assert_suffix_min!("a", "a", 1);
assert_suffix_max!("a", "a", 1);
assert_suffix_min!("ab", "ab", 2);
assert_suffix_max!("ab", "b", 1);
assert_suffix_min!("ba", "a", 1);
assert_suffix_max!("ba", "ba", 2);
assert_suffix_min!("abc", "abc", 3);
assert_suffix_max!("abc", "c", 1);
assert_suffix_min!("acb", "acb", 3);
assert_suffix_max!("acb", "cb", 2);
assert_suffix_min!("cba", "a", 1);
assert_suffix_max!("cba", "cba", 3);
assert_suffix_min!("abcabc", "abcabc", 3);
assert_suffix_max!("abcabc", "cabc", 3);
assert_suffix_min!("abcabcabc", "abcabcabc", 3);
assert_suffix_max!("abcabcabc", "cabcabc", 3);
assert_suffix_min!("abczz", "abczz", 5);
assert_suffix_max!("abczz", "zz", 1);
assert_suffix_min!("zzabc", "abc", 3);
assert_suffix_max!("zzabc", "zzabc", 5);
assert_suffix_min!("aaa", "aaa", 1);
assert_suffix_max!("aaa", "aaa", 1);
assert_suffix_min!("foobar", "ar", 2);
assert_suffix_max!("foobar", "r", 1);
}
#[test]
fn suffix_reverse() {
macro_rules! assert_suffix_min {
($given:expr, $expected:expr, $period:expr) => {
let (got_suffix, got_period) =
get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal);
assert_eq!((B($expected), $period), (got_suffix, got_period));
};
}
macro_rules! assert_suffix_max {
($given:expr, $expected:expr, $period:expr) => {
let (got_suffix, got_period) =
get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal);
assert_eq!((B($expected), $period), (got_suffix, got_period));
};
}
assert_suffix_min!("a", "a", 1);
assert_suffix_max!("a", "a", 1);
assert_suffix_min!("ab", "a", 1);
assert_suffix_max!("ab", "ab", 2);
assert_suffix_min!("ba", "ba", 2);
assert_suffix_max!("ba", "b", 1);
assert_suffix_min!("abc", "a", 1);
assert_suffix_max!("abc", "abc", 3);
assert_suffix_min!("acb", "a", 1);
assert_suffix_max!("acb", "ac", 2);
assert_suffix_min!("cba", "cba", 3);
assert_suffix_max!("cba", "c", 1);
assert_suffix_min!("abcabc", "abca", 3);
assert_suffix_max!("abcabc", "abcabc", 3);
assert_suffix_min!("abcabcabc", "abcabca", 3);
assert_suffix_max!("abcabcabc", "abcabcabc", 3);
assert_suffix_min!("abczz", "a", 1);
assert_suffix_max!("abczz", "abczz", 5);
assert_suffix_min!("zzabc", "zza", 3);
assert_suffix_max!("zzabc", "zz", 1);
assert_suffix_min!("aaa", "aaa", 1);
assert_suffix_max!("aaa", "aaa", 1);
}
quickcheck! {
fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool {
if bytes.is_empty() {
return true;
}
let (got, _) = get_suffix_forward(&bytes, SuffixKind::Maximal);
let expected = naive_maximal_suffix_forward(&bytes);
got == expected
}
fn qc_suffix_reverse_maximal(bytes: Vec<u8>) -> bool {
if bytes.is_empty() {
return true;
}
let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal);
let expected = naive_maximal_suffix_reverse(&bytes);
expected == got
}
}
}

View File

@ -0,0 +1,32 @@
/// A sequence of tests for checking whether lossy decoding uses the maximal
/// subpart strategy correctly. Namely, if a sequence of otherwise invalid
/// UTF-8 bytes is a valid prefix of a valid UTF-8 sequence, then the entire
/// prefix is replaced by a single replacement codepoint. In all other cases,
/// each invalid byte is replaced by a single replacement codepoint.
///
/// The first element in each tuple is the expected result of lossy decoding,
/// while the second element is the input given.
pub const LOSSY_TESTS: &[(&str, &[u8])] = &[
("a", b"a"),
("\u{FFFD}", b"\xFF"),
("\u{FFFD}\u{FFFD}", b"\xFF\xFF"),
("β\u{FFFD}", b"\xCE\xB2\xFF"),
("\u{FFFD}", b"\xE2\x98\x83\xFF"),
("𝝱\u{FFFD}", b"\xF0\x9D\x9D\xB1\xFF"),
("\u{FFFD}\u{FFFD}", b"\xCE\xF0"),
("\u{FFFD}\u{FFFD}", b"\xCE\xFF"),
("\u{FFFD}\u{FFFD}", b"\xE2\x98\xF0"),
("\u{FFFD}\u{FFFD}", b"\xE2\x98\xFF"),
("\u{FFFD}", b"\xF0\x9D\x9D"),
("\u{FFFD}\u{FFFD}", b"\xF0\x9D\x9D\xF0"),
("\u{FFFD}\u{FFFD}", b"\xF0\x9D\x9D\xFF"),
("\u{FFFD}", b"\xCE"),
("a\u{FFFD}", b"a\xCE"),
("\u{FFFD}", b"\xE2\x98"),
("a\u{FFFD}", b"a\xE2\x98"),
("\u{FFFD}", b"\xF0\x9D\x9C"),
("a\u{FFFD}", b"a\xF0\x9D\x9C"),
("a\u{FFFD}\u{FFFD}\u{FFFD}z", b"a\xED\xA0\x80z"),
("☃βツ\u{FFFD}", b"\xe2\x98\x83\xce\xb2\xe3\x83\x84\xFF"),
("a\u{FFFD}\u{FFFD}\u{FFFD}b", b"\x61\xF1\x80\x80\xE1\x80\xC2\x62"),
];

View File

@ -0,0 +1,630 @@
# GraphemeBreakTest-12.1.0.txt
# Date: 2019-03-10, 10:53:12 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Grapheme_Cluster_Break Test
#
# Format:
# <string> (# <comment>)?
# <string> contains hex Unicode code points, with
# ÷ wherever there is a break opportunity, and
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the Grapheme_Cluster_Break property value for the sample character
# - [x] the rule that determines whether there is a break or not,
# as listed in the Rules section of GraphemeBreakTest.html
#
# These samples may be extended or changed in the future.
#
÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0020 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0020 × 0308 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0020 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0020 × 0308 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000D ÷ 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 000D ÷ 0308 × 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000D ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000A ÷ 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 000A ÷ 0308 × 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000A ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0001 ÷ 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0001 ÷ 0308 × 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0001 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0001 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0001 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 034F ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 034F × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 034F ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 034F × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 034F ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 034F × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 034F ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 034F × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 034F × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 034F × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 034F ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 034F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 034F ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 034F × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 034F × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 034F × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 034F ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 034F × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 034F ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 034F × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 034F ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 034F × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 034F ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 034F × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 034F ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 034F × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 034F ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 034F × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 034F × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 034F × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 034F × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 034F × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 034F ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 034F × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F1E6 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1F1E6 × 0308 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1F1E6 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0600 × 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] SPACE (Other) ÷ [0.3]
÷ 0600 × 0308 ÷ 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0600 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0600 × 0308 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0600 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0600 × 0308 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0600 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0600 × 0308 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0600 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0600 × 0308 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0600 × 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0600 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0600 × 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0600 × 0308 ÷ 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0600 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0600 × 0308 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0600 × 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0600 × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0600 × 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0600 × 0308 ÷ 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0600 × 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0600 × 0308 ÷ 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0600 × AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0600 × 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3]
÷ 0600 × 0308 ÷ 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3]
÷ 0600 × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0903 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0903 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0903 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0903 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1100 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1100 × 0308 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1100 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1100 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1100 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1160 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1160 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1160 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1160 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1160 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 11A8 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 11A8 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 11A8 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 11A8 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC00 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ AC00 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC00 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC00 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC00 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC01 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ AC01 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC01 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC01 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC01 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 231A × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 231A × 0308 × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 231A ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 231A × 0308 ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 231A × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 231A × 0308 × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 231A ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 231A × 0308 ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 231A ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 231A × 0308 ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 231A ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 231A × 0308 ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 231A ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 231A × 0308 ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 231A ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 231A × 0308 ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 231A ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 231A × 0308 ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0300 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0300 × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0300 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0300 × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 200D × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 200D × 0308 × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 200D ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 200D × 0308 ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 200D × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 200D ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 200D × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 200D ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 200D ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0378 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0378 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0378 × 0308 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0378 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0378 × 0308 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0378 × 0308 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]
÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]
÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3]
÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [0.3]
÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
÷ 0061 × 200D ÷ 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
#
# Lines: 602
#
# EOF

View File

@ -0,0 +1,45 @@
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
See Terms of Use for definitions of Unicode Inc.'s
Data Files and Software.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2019 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.

View File

@ -0,0 +1,530 @@
# SentenceBreakTest-12.1.0.txt
# Date: 2019-03-10, 10:53:28 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Sentence_Break Test
#
# Format:
# <string> (# <comment>)?
# <string> contains hex Unicode code points, with
# ÷ wherever there is a break opportunity, and
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the Sentence_Break property value for the sample character
# - [x] the rule that determines whether there is a break or not,
# as listed in the Rules section of SentenceBreakTest.html
#
# These samples may be extended or changed in the future.
#
÷ 0001 × 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0001 × 0308 × 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0001 × 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0001 × 0308 × 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0001 × 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0001 × 0308 × 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0001 × 0085 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0001 × 0308 × 0085 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0001 × 0009 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0001 × 0308 × 0009 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0001 × 0061 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0001 × 0308 × 0061 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0001 × 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0001 × 0308 × 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0001 × 01BB ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0001 × 0308 × 01BB ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0001 × 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0001 × 0308 × 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0001 × 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0001 × 0308 × 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0001 × 0021 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0001 × 0308 × 0021 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0001 × 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0001 × 0308 × 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0001 × 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0001 × 0308 × 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0001 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0001 × 0308 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0001 × 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 000D ÷ 0308 × 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000D ÷ 0308 × 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000D ÷ 0308 × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000D ÷ 0085 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 000D ÷ 0308 × 0085 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 000D ÷ 0009 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 000D ÷ 0308 × 0009 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 000D ÷ 0061 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 000D ÷ 0308 × 0061 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 000D ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 000D ÷ 0308 × 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 000D ÷ 01BB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 000D ÷ 0308 × 01BB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 000D ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 000D ÷ 0308 × 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 000D ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
÷ 000D ÷ 0308 × 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 000D ÷ 0021 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 000D ÷ 0308 × 0021 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 000D ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 000D ÷ 0308 × 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 000D ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
÷ 000D ÷ 0308 × 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 000D ÷ 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 000D ÷ 0308 × 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 000A ÷ 0308 × 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000A ÷ 0308 × 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000A ÷ 0308 × 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000A ÷ 0085 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 000A ÷ 0308 × 0085 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 000A ÷ 0009 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 000A ÷ 0308 × 0009 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 000A ÷ 0061 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 000A ÷ 0308 × 0061 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 000A ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 000A ÷ 0308 × 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 000A ÷ 01BB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 000A ÷ 0308 × 01BB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 000A ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 000A ÷ 0308 × 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 000A ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
÷ 000A ÷ 0308 × 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 000A ÷ 0021 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 000A ÷ 0308 × 0021 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 000A ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 000A ÷ 0308 × 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 000A ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
÷ 000A ÷ 0308 × 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 000A ÷ 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 000A ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0085 ÷ 0001 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0085 ÷ 0308 × 0001 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0085 ÷ 000D ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0085 ÷ 0308 × 000D ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0085 ÷ 000A ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0085 ÷ 0308 × 000A ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0085 ÷ 0085 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0085 ÷ 0308 × 0085 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0085 ÷ 0009 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0085 ÷ 0308 × 0009 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0085 ÷ 0061 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0085 ÷ 0308 × 0061 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0085 ÷ 0041 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0085 ÷ 0308 × 0041 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0085 ÷ 01BB ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0085 ÷ 0308 × 01BB ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0085 ÷ 0030 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0085 ÷ 0308 × 0030 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0085 ÷ 002E ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0085 ÷ 0308 × 002E ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0085 ÷ 0021 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0085 ÷ 0308 × 0021 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0085 ÷ 0022 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0085 ÷ 0308 × 0022 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0085 ÷ 002C ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
÷ 0085 ÷ 0308 × 002C ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0085 ÷ 00AD ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0085 ÷ 0308 × 00AD ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0085 ÷ 0300 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0085 ÷ 0308 × 0300 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0009 × 0001 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0009 × 0308 × 0001 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0009 × 000D ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0009 × 0308 × 000D ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0009 × 000A ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0009 × 0308 × 000A ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0009 × 0085 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0009 × 0308 × 0085 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0009 × 0009 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0009 × 0308 × 0009 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0009 × 0061 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0009 × 0308 × 0061 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0009 × 0041 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0009 × 0308 × 0041 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0009 × 01BB ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0009 × 0308 × 01BB ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0009 × 0030 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0009 × 0308 × 0030 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0009 × 002E ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0009 × 0308 × 002E ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0009 × 0021 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0009 × 0308 × 0021 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0009 × 0022 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0009 × 0308 × 0022 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0009 × 002C ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0009 × 0308 × 002C ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0009 × 00AD ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0009 × 0308 × 00AD ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0009 × 0300 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0009 × 0308 × 0300 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0061 × 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0061 × 0308 × 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0061 × 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0061 × 0308 × 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0061 × 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0061 × 0308 × 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0061 × 0085 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0061 × 0308 × 0085 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0061 × 0009 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0061 × 0308 × 0009 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0061 × 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0061 × 0308 × 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0061 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0061 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0061 × 01BB ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0061 × 0308 × 01BB ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0061 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0061 × 0308 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0061 × 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0061 × 0308 × 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0061 × 0021 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0061 × 0308 × 0021 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0061 × 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0061 × 0308 × 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0061 × 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0061 × 0308 × 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0061 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0061 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0041 × 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0041 × 0308 × 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0041 × 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0041 × 0308 × 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0041 × 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0041 × 0308 × 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0041 × 0085 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0041 × 0308 × 0085 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0041 × 0009 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0041 × 0308 × 0009 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0041 × 0061 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0041 × 0308 × 0061 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0041 × 0308 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0041 × 01BB ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0041 × 0308 × 01BB ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0041 × 0308 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0041 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0041 × 0308 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0041 × 0021 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0041 × 0308 × 0021 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0041 × 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0041 × 0308 × 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0041 × 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0041 × 0308 × 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0041 × 0308 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0041 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0041 × 0308 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 01BB × 0001 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 01BB × 0308 × 0001 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 01BB × 000D ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 01BB × 0308 × 000D ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 01BB × 000A ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 01BB × 0308 × 000A ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 01BB × 0085 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 01BB × 0308 × 0085 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 01BB × 0009 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 01BB × 0308 × 0009 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 01BB × 0061 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 01BB × 0308 × 0061 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 01BB × 0041 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 01BB × 0308 × 0041 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 01BB × 01BB ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 01BB × 0308 × 01BB ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 01BB × 0030 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 01BB × 0308 × 0030 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 01BB × 002E ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 01BB × 0308 × 002E ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 01BB × 0021 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 01BB × 0308 × 0021 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 01BB × 0022 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 01BB × 0308 × 0022 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 01BB × 002C ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 01BB × 0308 × 002C ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 01BB × 00AD ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 01BB × 0308 × 00AD ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 01BB × 0300 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 01BB × 0308 × 0300 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0030 × 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0030 × 0308 × 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0030 × 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0030 × 0308 × 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0030 × 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0030 × 0308 × 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0030 × 0085 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0030 × 0308 × 0085 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0030 × 0009 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0030 × 0308 × 0009 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0030 × 0061 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0030 × 0308 × 0061 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0030 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0030 × 0308 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0030 × 01BB ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0030 × 0308 × 01BB ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0030 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0030 × 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0030 × 0308 × 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0030 × 0021 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0030 × 0308 × 0021 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0030 × 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0030 × 0308 × 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0030 × 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0030 × 0308 × 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0030 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0030 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0030 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 002E ÷ 0001 ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 002E × 0308 ÷ 0001 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 002E × 000D ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 002E × 0308 × 000D ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 002E × 000A ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 002E × 0308 × 000A ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 002E × 0085 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 002E × 0308 × 0085 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 002E × 0009 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 002E × 0308 × 0009 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 002E × 0061 ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 002E × 0308 × 0061 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 002E ÷ 0041 ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 002E × 0308 ÷ 0041 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 002E ÷ 01BB ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 002E × 0308 ÷ 01BB ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 002E × 0030 ÷ # ÷ [0.2] FULL STOP (ATerm) × [6.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 002E × 0308 × 0030 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [6.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 002E × 002E ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] FULL STOP (ATerm) ÷ [0.3]
÷ 002E × 0308 × 002E ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] FULL STOP (ATerm) ÷ [0.3]
÷ 002E × 0021 ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 002E × 0308 × 0021 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 002E × 0022 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 002E × 0308 × 0022 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 002E × 002C ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] COMMA (SContinue) ÷ [0.3]
÷ 002E × 0308 × 002C ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] COMMA (SContinue) ÷ [0.3]
÷ 002E × 00AD ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 002E × 0308 × 00AD ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 002E × 0300 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 002E × 0308 × 0300 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0021 ÷ 0001 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0021 × 0308 ÷ 0001 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0021 × 000D ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0021 × 0308 × 000D ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0021 × 000A ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0021 × 0308 × 000A ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0021 × 0085 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0021 × 0308 × 0085 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0021 × 0009 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0021 × 0308 × 0009 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0021 ÷ 0061 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0021 × 0308 ÷ 0061 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0021 ÷ 0041 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0021 × 0308 ÷ 0041 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0021 ÷ 01BB ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0021 × 0308 ÷ 01BB ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0021 ÷ 0030 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0021 × 0308 ÷ 0030 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0021 × 002E ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] FULL STOP (ATerm) ÷ [0.3]
÷ 0021 × 0308 × 002E ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] FULL STOP (ATerm) ÷ [0.3]
÷ 0021 × 0021 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0021 × 0308 × 0021 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0021 × 0022 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0021 × 0308 × 0022 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0021 × 002C ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] COMMA (SContinue) ÷ [0.3]
÷ 0021 × 0308 × 002C ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] COMMA (SContinue) ÷ [0.3]
÷ 0021 × 00AD ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0021 × 0308 × 00AD ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0021 × 0300 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0021 × 0308 × 0300 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0022 × 0001 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0022 × 0308 × 0001 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0022 × 000D ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0022 × 0308 × 000D ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0022 × 000A ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0022 × 0308 × 000A ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0022 × 0085 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0022 × 0308 × 0085 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0022 × 0009 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0022 × 0308 × 0009 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0022 × 0061 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0022 × 0308 × 0061 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0022 × 0041 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0022 × 0308 × 0041 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0022 × 01BB ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0022 × 0308 × 01BB ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0022 × 0030 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0022 × 0308 × 0030 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0022 × 002E ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0022 × 0308 × 002E ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0022 × 0021 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0022 × 0308 × 0021 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0022 × 0022 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0022 × 0308 × 0022 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0022 × 002C ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0022 × 0308 × 002C ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0022 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0022 × 0308 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0022 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0022 × 0308 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 002C × 0001 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 002C × 0308 × 0001 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 002C × 000D ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 002C × 0308 × 000D ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 002C × 000A ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 002C × 0308 × 000A ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 002C × 0085 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 002C × 0308 × 0085 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 002C × 0009 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 002C × 0308 × 0009 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 002C × 0061 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 002C × 0308 × 0061 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 002C × 0041 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 002C × 0308 × 0041 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 002C × 01BB ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 002C × 0308 × 01BB ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 002C × 0030 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 002C × 0308 × 0030 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 002C × 002E ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 002C × 0308 × 002E ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 002C × 0021 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 002C × 0308 × 0021 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 002C × 0022 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 002C × 0308 × 0022 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 002C × 002C ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 002C × 0308 × 002C ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 002C × 0300 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 00AD × 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 00AD × 0308 × 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 00AD × 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 00AD × 0308 × 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 00AD × 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 00AD × 0308 × 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 00AD × 0085 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 00AD × 0308 × 0085 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 00AD × 0009 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 00AD × 0308 × 0009 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 00AD × 0061 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 00AD × 0308 × 0061 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 00AD × 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 00AD × 0308 × 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 00AD × 01BB ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 00AD × 0308 × 01BB ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 00AD × 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 00AD × 0308 × 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 00AD × 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 00AD × 0308 × 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 00AD × 0021 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 00AD × 0308 × 0021 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 00AD × 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 00AD × 0308 × 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 00AD × 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 00AD × 0308 × 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 00AD × 0308 × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 00AD × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 00AD × 0308 × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0300 × 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0300 × 0308 × 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 0300 × 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0300 × 0308 × 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0300 × 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0300 × 0308 × 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0300 × 0085 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0300 × 0308 × 0085 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
÷ 0300 × 0009 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0300 × 0308 × 0009 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
÷ 0300 × 0061 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0300 × 0308 × 0061 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
÷ 0300 × 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0300 × 0308 × 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
÷ 0300 × 01BB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0300 × 0308 × 01BB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
÷ 0300 × 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0300 × 0308 × 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0300 × 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0300 × 0308 × 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0300 × 0021 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0300 × 0308 × 0021 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
÷ 0300 × 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0300 × 0308 × 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
÷ 0300 × 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0300 × 0308 × 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 000D × 000A ÷ 0061 × 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) × [998.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
÷ 0020 × 200D × 0646 ÷ # ÷ [0.2] SPACE (Sp) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [998.0] ARABIC LETTER NOON (OLetter) ÷ [0.3]
÷ 0646 × 200D × 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (OLetter) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [998.0] SPACE (Sp) ÷ [0.3]
÷ 0028 × 0022 × 0047 × 006F × 002E × 0022 × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [998.0] QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER G (Upper) × [998.0] LATIN SMALL LETTER O (Lower) × [998.0] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3]
÷ 0028 × 201C × 0047 × 006F × 003F × 201D × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [998.0] LEFT DOUBLE QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER G (Upper) × [998.0] LATIN SMALL LETTER O (Lower) × [998.0] QUESTION MARK (STerm) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3]
÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E × 0020 × 0069 × 0073 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER S (Lower) ÷ [0.3]
÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 003F × 0020 ÷ 0048 × 0065 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUESTION MARK (STerm) × [9.0] SPACE (Sp) ÷ [11.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
÷ 0033 × 002E × 0034 ÷ # ÷ [0.2] DIGIT THREE (Numeric) × [998.0] FULL STOP (ATerm) × [6.0] DIGIT FOUR (Numeric) ÷ [0.3]
÷ 0063 × 002E × 0064 ÷ # ÷ [0.2] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER D (Lower) ÷ [0.3]
÷ 0043 × 002E × 0064 ÷ # ÷ [0.2] LATIN CAPITAL LETTER C (Upper) × [998.0] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER D (Lower) ÷ [0.3]
÷ 0063 × 002E × 0044 ÷ # ÷ [0.2] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER D (Upper) ÷ [0.3]
÷ 0043 × 002E × 0044 ÷ # ÷ [0.2] LATIN CAPITAL LETTER C (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER D (Upper) ÷ [0.3]
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [8.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 2018 × 0028 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [8.0] LEFT SINGLE QUOTATION MARK (Close) × [998.0] LEFT PARENTHESIS (Close) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 2018 × 0028 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [998.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
÷ 0065 × 0074 × 0063 × 002E × 0029 × 000A ÷ 0308 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
÷ 0074 × 0068 × 0065 × 0020 × 0072 × 0065 × 0073 × 0070 × 002E × 0020 × 006C × 0065 × 0061 × 0064 × 0065 × 0072 × 0073 × 0020 × 0061 × 0072 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER S (Lower) × [998.0] LATIN SMALL LETTER P (Lower) × [998.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER L (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER S (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
÷ 5B57 × 002E ÷ 5B57 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [998.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) ÷ [0.3]
÷ 0065 × 0074 × 0063 × 002E ÷ 5B83 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3]
÷ 0065 × 0074 × 0063 × 002E × 3002 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.1] IDEOGRAPHIC FULL STOP (STerm) ÷ [0.3]
÷ 5B57 × 3002 ÷ 5B83 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [998.0] IDEOGRAPHIC FULL STOP (STerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3]
÷ 0021 × 0020 × 0020 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] SPACE (Sp) × [10.0] SPACE (Sp) ÷ [0.3]
÷ 2060 × 0028 × 2060 × 0022 × 2060 × 0047 × 2060 × 006F × 2060 × 002E × 2060 × 0022 × 2060 × 0029 × 2060 × 0020 × 2060 ÷ 0028 × 2060 × 0048 × 2060 × 0065 × 2060 × 0020 × 2060 × 0064 × 2060 × 0069 × 2060 × 0064 × 2060 × 002E × 2060 × 0029 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER G (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER O (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0028 × 2060 × 201C × 2060 × 0047 × 2060 × 006F × 2060 × 003F × 2060 × 201D × 2060 × 0029 × 2060 × 0020 × 2060 ÷ 0028 × 2060 × 0048 × 2060 × 0065 × 2060 × 0020 × 2060 × 0064 × 2060 × 0069 × 2060 × 0064 × 2060 × 002E × 2060 × 0029 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT DOUBLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER G (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER O (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] QUESTION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 002E × 2060 × 0020 × 2060 × 0069 × 2060 × 0073 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 003F × 2060 × 0020 × 2060 ÷ 0048 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUESTION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 002E × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0033 × 2060 × 002E × 2060 × 0034 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] DIGIT THREE (Numeric) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [6.0] DIGIT FOUR (Numeric) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0063 × 2060 × 002E × 2060 × 0064 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0043 × 2060 × 002E × 2060 × 0064 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER C (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0063 × 2060 × 002E × 2060 × 0044 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER D (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0043 × 2060 × 002E × 2060 × 0044 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER C (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER D (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 2018 × 2060 × 0028 × 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 2018 × 2060 × 0028 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 000A ÷ 2060 × 0308 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 0020 × 2060 × 0072 × 2060 × 0065 × 2060 × 0073 × 2060 × 0070 × 2060 × 002E × 2060 × 0020 × 2060 × 006C × 2060 × 0065 × 2060 × 0061 × 2060 × 0064 × 2060 × 0065 × 2060 × 0072 × 2060 × 0073 × 2060 × 0020 × 2060 × 0061 × 2060 × 0072 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER P (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER L (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 5B57 × 2060 × 002E × 2060 ÷ 5B57 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 3002 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.1] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 5B57 × 2060 × 3002 × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [998.0] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0021 × 2060 × 0020 × 2060 × 0020 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] EXCLAMATION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [10.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
#
# Lines: 502
#
# EOF

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate dfa --name GRAPHEME_BREAK_FWD --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
//
// ucd-generate 0.2.8 is available on crates.io.
#[cfg(target_endian = "big")]
lazy_static! {
pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"),
};
unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
};
}
#[cfg(target_endian = "little")]
lazy_static! {
pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"),
};
unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
};
}

View File

@ -0,0 +1,41 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate dfa --name GRAPHEME_BREAK_REV --reverse --longest --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
//
// ucd-generate 0.2.8 is available on crates.io.
#[cfg(target_endian = "big")]
lazy_static! {
pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"),
};
unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
};
}
#[cfg(target_endian = "little")]
lazy_static! {
pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"),
};
unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
};
}

View File

@ -0,0 +1,8 @@
pub mod grapheme_break_fwd;
pub mod grapheme_break_rev;
pub mod regional_indicator_rev;
pub mod sentence_break_fwd;
pub mod simple_word_fwd;
pub mod whitespace_anchored_fwd;
pub mod whitespace_anchored_rev;
pub mod word_break_fwd;

View File

@ -0,0 +1,41 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate dfa --name REGIONAL_INDICATOR_REV --reverse --classes --minimize --anchored --premultiply --state-size 1 src/unicode/fsm/ \p{gcb=Regional_Indicator}
//
// ucd-generate 0.2.8 is available on crates.io.
#[cfg(target_endian = "big")]
lazy_static! {
pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"),
};
unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
};
}
#[cfg(target_endian = "little")]
lazy_static! {
pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"),
};
unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
};
}

View File

@ -0,0 +1,41 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate dfa --name SENTENCE_BREAK_FWD --minimize --sparse --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
//
// ucd-generate 0.2.8 is available on crates.io.
#[cfg(target_endian = "big")]
lazy_static! {
pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"),
};
unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
};
}
#[cfg(target_endian = "little")]
lazy_static! {
pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"),
};
unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
};
}

View File

@ -0,0 +1,41 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate dfa --name SIMPLE_WORD_FWD --sparse --minimize --state-size 2 src/unicode/fsm/ \w
//
// ucd-generate 0.2.8 is available on crates.io.
#[cfg(target_endian = "big")]
lazy_static! {
pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"),
};
unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
};
}
#[cfg(target_endian = "little")]
lazy_static! {
pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"),
};
unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
};
}

View File

@ -0,0 +1,41 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate dfa --name WHITESPACE_ANCHORED_FWD --anchored --classes --premultiply --minimize --state-size 1 src/unicode/fsm/ \s+
//
// ucd-generate 0.2.8 is available on crates.io.
#[cfg(target_endian = "big")]
lazy_static! {
pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"),
};
unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
};
}
#[cfg(target_endian = "little")]
lazy_static! {
pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"),
};
unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
};
}

View File

@ -0,0 +1,41 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate dfa --name WHITESPACE_ANCHORED_REV --reverse --anchored --classes --minimize --state-size 1 src/unicode/fsm/ \s+
//
// ucd-generate 0.2.8 is available on crates.io.
#[cfg(target_endian = "big")]
lazy_static! {
pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"),
};
unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
};
}
#[cfg(target_endian = "little")]
lazy_static! {
pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"),
};
unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
};
}

View File

@ -0,0 +1,41 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate dfa --name WORD_BREAK_FWD --sparse --minimize --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
//
// ucd-generate 0.2.8 is available on crates.io.
#[cfg(target_endian = "big")]
lazy_static! {
pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("word_break_fwd.bigendian.dfa"),
};
unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
};
}
#[cfg(target_endian = "little")]
lazy_static! {
pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
#[repr(C)]
struct Aligned<B: ?Sized> {
_align: [u8; 0],
bytes: B,
}
static ALIGNED: &'static Aligned<[u8]> = &Aligned {
_align: [],
bytes: *include_bytes!("word_break_fwd.littleendian.dfa"),
};
unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
};
}

View File

@ -0,0 +1,355 @@
use regex_automata::DFA;
use ext_slice::ByteSlice;
use unicode::fsm::grapheme_break_fwd::GRAPHEME_BREAK_FWD;
use unicode::fsm::grapheme_break_rev::GRAPHEME_BREAK_REV;
use unicode::fsm::regional_indicator_rev::REGIONAL_INDICATOR_REV;
use utf8;
/// An iterator over grapheme clusters in a byte string.
///
/// This iterator is typically constructed by
/// [`ByteSlice::graphemes`](trait.ByteSlice.html#method.graphemes).
///
/// Unicode defines a grapheme cluster as an *approximation* to a single user
/// visible character. A grapheme cluster, or just "grapheme," is made up of
/// one or more codepoints. For end user oriented tasks, one should generally
/// prefer using graphemes instead of [`Chars`](struct.Chars.html), which
/// always yields one codepoint at a time.
///
/// Since graphemes are made up of one or more codepoints, this iterator yields
/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
/// are [substituted](index.html#handling-of-invalid-utf-8).
///
/// This iterator can be used in reverse. When reversed, exactly the same
/// set of grapheme clusters are yielded, but in reverse order.
///
/// This iterator only yields *extended* grapheme clusters, in accordance with
/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Grapheme_Cluster_Boundaries).
#[derive(Clone, Debug)]
pub struct Graphemes<'a> {
bs: &'a [u8],
}
impl<'a> Graphemes<'a> {
pub(crate) fn new(bs: &'a [u8]) -> Graphemes<'a> {
Graphemes { bs }
}
/// View the underlying data as a subslice of the original data.
///
/// The slice returned has the same lifetime as the original slice, and so
/// the iterator can continue to be used while this exists.
///
/// # Examples
///
/// ```
/// use bstr::ByteSlice;
///
/// let mut it = b"abc".graphemes();
///
/// assert_eq!(b"abc", it.as_bytes());
/// it.next();
/// assert_eq!(b"bc", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b"", it.as_bytes());
/// ```
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
self.bs
}
}
impl<'a> Iterator for Graphemes<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
let (grapheme, size) = decode_grapheme(self.bs);
if size == 0 {
return None;
}
self.bs = &self.bs[size..];
Some(grapheme)
}
}
impl<'a> DoubleEndedIterator for Graphemes<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
let (grapheme, size) = decode_last_grapheme(self.bs);
if size == 0 {
return None;
}
self.bs = &self.bs[..self.bs.len() - size];
Some(grapheme)
}
}
/// An iterator over grapheme clusters in a byte string and their byte index
/// positions.
///
/// This iterator is typically constructed by
/// [`ByteSlice::grapheme_indices`](trait.ByteSlice.html#method.grapheme_indices).
///
/// Unicode defines a grapheme cluster as an *approximation* to a single user
/// visible character. A grapheme cluster, or just "grapheme," is made up of
/// one or more codepoints. For end user oriented tasks, one should generally
/// prefer using graphemes instead of [`Chars`](struct.Chars.html), which
/// always yields one codepoint at a time.
///
/// Since graphemes are made up of one or more codepoints, this iterator
/// yields `&str` elements (along with their start and end byte offsets).
/// When invalid UTF-8 is encountered, replacement codepoints are
/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
/// indices yielded by this iterator may not correspond to the length of the
/// grapheme cluster yielded with those indices. For example, when this
/// iterator encounters `\xFF` in the byte string, then it will yield a pair
/// of indices ranging over a single byte, but will provide an `&str`
/// equivalent to `"\u{FFFD}"`, which is three bytes in length. However, when
/// given only valid UTF-8, then all indices are in exact correspondence with
/// their paired grapheme cluster.
///
/// This iterator can be used in reverse. When reversed, exactly the same
/// set of grapheme clusters are yielded, but in reverse order.
///
/// This iterator only yields *extended* grapheme clusters, in accordance with
/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Grapheme_Cluster_Boundaries).
#[derive(Clone, Debug)]
pub struct GraphemeIndices<'a> {
bs: &'a [u8],
forward_index: usize,
reverse_index: usize,
}
impl<'a> GraphemeIndices<'a> {
pub(crate) fn new(bs: &'a [u8]) -> GraphemeIndices<'a> {
GraphemeIndices { bs: bs, forward_index: 0, reverse_index: bs.len() }
}
/// View the underlying data as a subslice of the original data.
///
/// The slice returned has the same lifetime as the original slice, and so
/// the iterator can continue to be used while this exists.
///
/// # Examples
///
/// ```
/// use bstr::ByteSlice;
///
/// let mut it = b"abc".grapheme_indices();
///
/// assert_eq!(b"abc", it.as_bytes());
/// it.next();
/// assert_eq!(b"bc", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b"", it.as_bytes());
/// ```
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
self.bs
}
}
impl<'a> Iterator for GraphemeIndices<'a> {
type Item = (usize, usize, &'a str);
#[inline]
fn next(&mut self) -> Option<(usize, usize, &'a str)> {
let index = self.forward_index;
let (grapheme, size) = decode_grapheme(self.bs);
if size == 0 {
return None;
}
self.bs = &self.bs[size..];
self.forward_index += size;
Some((index, index + size, grapheme))
}
}
impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
#[inline]
fn next_back(&mut self) -> Option<(usize, usize, &'a str)> {
let (grapheme, size) = decode_last_grapheme(self.bs);
if size == 0 {
return None;
}
self.bs = &self.bs[..self.bs.len() - size];
self.reverse_index -= size;
Some((self.reverse_index, self.reverse_index + size, grapheme))
}
}
/// Decode a grapheme from the given byte string.
///
/// This returns the resulting grapheme (which may be a Unicode replacement
/// codepoint if invalid UTF-8 was found), along with the number of bytes
/// decoded in the byte string. The number of bytes decoded may not be the
/// same as the length of grapheme in the case where invalid UTF-8 is found.
pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
} else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) {
// Safe because a match can only occur for valid UTF-8.
let grapheme = unsafe { bs[..end].to_str_unchecked() };
(grapheme, grapheme.len())
} else {
const INVALID: &'static str = "\u{FFFD}";
// No match on non-empty bytes implies we found invalid UTF-8.
let (_, size) = utf8::decode_lossy(bs);
(INVALID, size)
}
}
fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
} else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) {
start = adjust_rev_for_regional_indicator(bs, start);
// Safe because a match can only occur for valid UTF-8.
let grapheme = unsafe { bs[start..].to_str_unchecked() };
(grapheme, grapheme.len())
} else {
const INVALID: &'static str = "\u{FFFD}";
// No match on non-empty bytes implies we found invalid UTF-8.
let (_, size) = utf8::decode_last_lossy(bs);
(INVALID, size)
}
}
/// Return the correct offset for the next grapheme decoded at the end of the
/// given byte string, where `i` is the initial guess. In particular,
/// `&bs[i..]` represents the candidate grapheme.
///
/// `i` is returned by this function in all cases except when `&bs[i..]` is
/// a pair of regional indicator codepoints. In that case, if an odd number of
/// additional regional indicator codepoints precedes `i`, then `i` is
/// adjusted such that it points to only a single regional indicator.
///
/// This "fixing" is necessary to handle the requirement that a break cannot
/// occur between regional indicators where it would cause an odd number of
/// regional indicators to exist before the break from the *start* of the
/// string. A reverse regex cannot detect this case easily without look-around.
fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {
// All regional indicators use a 4 byte encoding, and we only care about
// the case where we found a pair of regional indicators.
if bs.len() - i != 8 {
return i;
}
// Count all contiguous occurrences of regional indicators. If there's an
// even number of them, then we can accept the pair we found. Otherwise,
// we can only take one of them.
//
// FIXME: This is quadratic in the worst case, e.g., a string of just
// regional indicator codepoints. A fix probably requires refactoring this
// code a bit such that we don't rescan regional indicators.
let mut count = 0;
while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) {
bs = &bs[..start];
count += 1;
}
if count % 2 == 0 {
i
} else {
i + 4
}
}
#[cfg(test)]
mod tests {
use ucd_parse::GraphemeClusterBreakTest;
use super::*;
use ext_slice::ByteSlice;
use tests::LOSSY_TESTS;
#[test]
fn forward_ucd() {
for (i, test) in ucdtests().into_iter().enumerate() {
let given = test.grapheme_clusters.concat();
let got: Vec<String> = Graphemes::new(given.as_bytes())
.map(|cluster| cluster.to_string())
.collect();
assert_eq!(
test.grapheme_clusters,
got,
"\ngrapheme forward break test {} failed:\n\
given: {:?}\n\
expected: {:?}\n\
got: {:?}\n",
i,
uniescape(&given),
uniescape_vec(&test.grapheme_clusters),
uniescape_vec(&got),
);
}
}
#[test]
fn reverse_ucd() {
for (i, test) in ucdtests().into_iter().enumerate() {
let given = test.grapheme_clusters.concat();
let mut got: Vec<String> = Graphemes::new(given.as_bytes())
.rev()
.map(|cluster| cluster.to_string())
.collect();
got.reverse();
assert_eq!(
test.grapheme_clusters,
got,
"\n\ngrapheme reverse break test {} failed:\n\
given: {:?}\n\
expected: {:?}\n\
got: {:?}\n",
i,
uniescape(&given),
uniescape_vec(&test.grapheme_clusters),
uniescape_vec(&got),
);
}
}
#[test]
fn forward_lossy() {
for &(expected, input) in LOSSY_TESTS {
let got = Graphemes::new(input.as_bytes()).collect::<String>();
assert_eq!(expected, got);
}
}
#[test]
fn reverse_lossy() {
for &(expected, input) in LOSSY_TESTS {
let expected: String = expected.chars().rev().collect();
let got =
Graphemes::new(input.as_bytes()).rev().collect::<String>();
assert_eq!(expected, got);
}
}
fn uniescape(s: &str) -> String {
s.chars().flat_map(|c| c.escape_unicode()).collect::<String>()
}
fn uniescape_vec(strs: &[String]) -> Vec<String> {
strs.iter().map(|s| uniescape(s)).collect()
}
/// Return all of the UCD for grapheme breaks.
fn ucdtests() -> Vec<GraphemeClusterBreakTest> {
const TESTDATA: &'static str =
include_str!("data/GraphemeBreakTest.txt");
let mut tests = vec![];
for mut line in TESTDATA.lines() {
line = line.trim();
if line.starts_with("#") || line.contains("surrogate") {
continue;
}
tests.push(line.parse().unwrap());
}
tests
}
}

View File

@ -0,0 +1,12 @@
pub use self::grapheme::{decode_grapheme, GraphemeIndices, Graphemes};
pub use self::sentence::{SentenceIndices, Sentences};
pub use self::whitespace::{whitespace_len_fwd, whitespace_len_rev};
pub use self::word::{
WordIndices, Words, WordsWithBreakIndices, WordsWithBreaks,
};
mod fsm;
mod grapheme;
mod sentence;
mod whitespace;
mod word;

View File

@ -0,0 +1,220 @@
use regex_automata::DFA;
use ext_slice::ByteSlice;
use unicode::fsm::sentence_break_fwd::SENTENCE_BREAK_FWD;
use utf8;
/// An iterator over sentences in a byte string.
///
/// This iterator is typically constructed by
/// [`ByteSlice::sentences`](trait.ByteSlice.html#method.sentences).
///
/// Sentences typically include their trailing punctuation and whitespace.
///
/// Since sentences are made up of one or more codepoints, this iterator yields
/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
/// are [substituted](index.html#handling-of-invalid-utf-8).
///
/// This iterator yields words in accordance with the default sentence boundary
/// rules specified in
/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Sentence_Boundaries).
#[derive(Clone, Debug)]
pub struct Sentences<'a> {
bs: &'a [u8],
}
impl<'a> Sentences<'a> {
pub(crate) fn new(bs: &'a [u8]) -> Sentences<'a> {
Sentences { bs }
}
/// View the underlying data as a subslice of the original data.
///
/// The slice returned has the same lifetime as the original slice, and so
/// the iterator can continue to be used while this exists.
///
/// # Examples
///
/// ```
/// use bstr::ByteSlice;
///
/// let mut it = b"I want this. Not that. Right now.".sentences();
///
/// assert_eq!(&b"I want this. Not that. Right now."[..], it.as_bytes());
/// it.next();
/// assert_eq!(b"Not that. Right now.", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b"", it.as_bytes());
/// ```
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
self.bs
}
}
impl<'a> Iterator for Sentences<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
let (sentence, size) = decode_sentence(self.bs);
if size == 0 {
return None;
}
self.bs = &self.bs[size..];
Some(sentence)
}
}
/// An iterator over sentences in a byte string, along with their byte offsets.
///
/// This iterator is typically constructed by
/// [`ByteSlice::sentence_indices`](trait.ByteSlice.html#method.sentence_indices).
///
/// Sentences typically include their trailing punctuation and whitespace.
///
/// Since sentences are made up of one or more codepoints, this iterator
/// yields `&str` elements (along with their start and end byte offsets).
/// When invalid UTF-8 is encountered, replacement codepoints are
/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
/// indices yielded by this iterator may not correspond to the length of the
/// sentence yielded with those indices. For example, when this iterator
/// encounters `\xFF` in the byte string, then it will yield a pair of indices
/// ranging over a single byte, but will provide an `&str` equivalent to
/// `"\u{FFFD}"`, which is three bytes in length. However, when given only
/// valid UTF-8, then all indices are in exact correspondence with their paired
/// word.
///
/// This iterator yields words in accordance with the default sentence boundary
/// rules specified in
/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Sentence_Boundaries).
#[derive(Clone, Debug)]
pub struct SentenceIndices<'a> {
bs: &'a [u8],
forward_index: usize,
}
impl<'a> SentenceIndices<'a> {
pub(crate) fn new(bs: &'a [u8]) -> SentenceIndices<'a> {
SentenceIndices { bs: bs, forward_index: 0 }
}
/// View the underlying data as a subslice of the original data.
///
/// The slice returned has the same lifetime as the original slice, and so
/// the iterator can continue to be used while this exists.
///
/// # Examples
///
/// ```
/// use bstr::ByteSlice;
///
/// let mut it = b"I want this. Not that. Right now.".sentence_indices();
///
/// assert_eq!(&b"I want this. Not that. Right now."[..], it.as_bytes());
/// it.next();
/// assert_eq!(b"Not that. Right now.", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b"", it.as_bytes());
/// ```
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
self.bs
}
}
impl<'a> Iterator for SentenceIndices<'a> {
type Item = (usize, usize, &'a str);
#[inline]
fn next(&mut self) -> Option<(usize, usize, &'a str)> {
let index = self.forward_index;
let (word, size) = decode_sentence(self.bs);
if size == 0 {
return None;
}
self.bs = &self.bs[size..];
self.forward_index += size;
Some((index, index + size, word))
}
}
fn decode_sentence(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
} else if let Some(end) = SENTENCE_BREAK_FWD.find(bs) {
// Safe because a match can only occur for valid UTF-8.
let sentence = unsafe { bs[..end].to_str_unchecked() };
(sentence, sentence.len())
} else {
const INVALID: &'static str = "\u{FFFD}";
// No match on non-empty bytes implies we found invalid UTF-8.
let (_, size) = utf8::decode_lossy(bs);
(INVALID, size)
}
}
#[cfg(test)]
mod tests {
use ucd_parse::SentenceBreakTest;
use ext_slice::ByteSlice;
#[test]
fn forward_ucd() {
for (i, test) in ucdtests().into_iter().enumerate() {
let given = test.sentences.concat();
let got = sentences(given.as_bytes());
assert_eq!(
test.sentences,
got,
"\n\nsentence forward break test {} failed:\n\
given: {:?}\n\
expected: {:?}\n\
got: {:?}\n",
i,
given,
strs_to_bstrs(&test.sentences),
strs_to_bstrs(&got),
);
}
}
// Some additional tests that don't seem to be covered by the UCD tests.
#[test]
fn forward_additional() {
assert_eq!(vec!["a.. ", "A"], sentences(b"a.. A"));
assert_eq!(vec!["a.. a"], sentences(b"a.. a"));
assert_eq!(vec!["a... ", "A"], sentences(b"a... A"));
assert_eq!(vec!["a... a"], sentences(b"a... a"));
assert_eq!(vec!["a...,..., a"], sentences(b"a...,..., a"));
}
fn sentences(bytes: &[u8]) -> Vec<&str> {
bytes.sentences().collect()
}
fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
strs.iter().map(|s| s.as_ref().as_bytes()).collect()
}
/// Return all of the UCD for sentence breaks.
fn ucdtests() -> Vec<SentenceBreakTest> {
const TESTDATA: &'static str =
include_str!("data/SentenceBreakTest.txt");
let mut tests = vec![];
for mut line in TESTDATA.lines() {
line = line.trim();
if line.starts_with("#") || line.contains("surrogate") {
continue;
}
tests.push(line.parse().unwrap());
}
tests
}
}

View File

@ -0,0 +1,14 @@
use regex_automata::DFA;
use unicode::fsm::whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD;
use unicode::fsm::whitespace_anchored_rev::WHITESPACE_ANCHORED_REV;
/// Return the first position of a non-whitespace character.
pub fn whitespace_len_fwd(slice: &[u8]) -> usize {
WHITESPACE_ANCHORED_FWD.find(slice).unwrap_or(0)
}
/// Return the last position of a non-whitespace character.
pub fn whitespace_len_rev(slice: &[u8]) -> usize {
WHITESPACE_ANCHORED_REV.rfind(slice).unwrap_or(slice.len())
}

View File

@ -0,0 +1,406 @@
use regex_automata::DFA;
use ext_slice::ByteSlice;
use unicode::fsm::simple_word_fwd::SIMPLE_WORD_FWD;
use unicode::fsm::word_break_fwd::WORD_BREAK_FWD;
use utf8;
/// An iterator over words in a byte string.
///
/// This iterator is typically constructed by
/// [`ByteSlice::words`](trait.ByteSlice.html#method.words).
///
/// This is similar to the [`WordsWithBreaks`](struct.WordsWithBreaks.html)
/// iterator, except it only returns elements that contain a "word" character.
/// A word character is defined by UTS #18 (Annex C) to be the combination
/// of the `Alphabetic` and `Join_Control` properties, along with the
/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general categories.
///
/// Since words are made up of one or more codepoints, this iterator yields
/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
/// are [substituted](index.html#handling-of-invalid-utf-8).
///
/// This iterator yields words in accordance with the default word boundary
/// rules specified in
/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
/// In particular, this may not be suitable for Japanese and Chinese scripts
/// that do not use spaces between words.
#[derive(Clone, Debug)]
pub struct Words<'a>(WordsWithBreaks<'a>);
impl<'a> Words<'a> {
pub(crate) fn new(bs: &'a [u8]) -> Words<'a> {
Words(WordsWithBreaks::new(bs))
}
/// View the underlying data as a subslice of the original data.
///
/// The slice returned has the same lifetime as the original slice, and so
/// the iterator can continue to be used while this exists.
///
/// # Examples
///
/// ```
/// use bstr::ByteSlice;
///
/// let mut it = b"foo bar baz".words();
///
/// assert_eq!(b"foo bar baz", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b" baz", it.as_bytes());
/// it.next();
/// assert_eq!(b"", it.as_bytes());
/// ```
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
self.0.as_bytes()
}
}
impl<'a> Iterator for Words<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
while let Some(word) = self.0.next() {
if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
return Some(word);
}
}
None
}
}
/// An iterator over words in a byte string and their byte index positions.
///
/// This iterator is typically constructed by
/// [`ByteSlice::word_indices`](trait.ByteSlice.html#method.word_indices).
///
/// This is similar to the
/// [`WordsWithBreakIndices`](struct.WordsWithBreakIndices.html) iterator,
/// except it only returns elements that contain a "word" character. A
/// word character is defined by UTS #18 (Annex C) to be the combination
/// of the `Alphabetic` and `Join_Control` properties, along with the
/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general categories.
///
/// Since words are made up of one or more codepoints, this iterator
/// yields `&str` elements (along with their start and end byte offsets).
/// When invalid UTF-8 is encountered, replacement codepoints are
/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
/// indices yielded by this iterator may not correspond to the length of the
/// word yielded with those indices. For example, when this iterator encounters
/// `\xFF` in the byte string, then it will yield a pair of indices ranging
/// over a single byte, but will provide an `&str` equivalent to `"\u{FFFD}"`,
/// which is three bytes in length. However, when given only valid UTF-8, then
/// all indices are in exact correspondence with their paired word.
///
/// This iterator yields words in accordance with the default word boundary
/// rules specified in
/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
/// In particular, this may not be suitable for Japanese and Chinese scripts
/// that do not use spaces between words.
#[derive(Clone, Debug)]
pub struct WordIndices<'a>(WordsWithBreakIndices<'a>);
impl<'a> WordIndices<'a> {
pub(crate) fn new(bs: &'a [u8]) -> WordIndices<'a> {
WordIndices(WordsWithBreakIndices::new(bs))
}
/// View the underlying data as a subslice of the original data.
///
/// The slice returned has the same lifetime as the original slice, and so
/// the iterator can continue to be used while this exists.
///
/// # Examples
///
/// ```
/// use bstr::ByteSlice;
///
/// let mut it = b"foo bar baz".word_indices();
///
/// assert_eq!(b"foo bar baz", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b" baz", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b"", it.as_bytes());
/// ```
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
self.0.as_bytes()
}
}
impl<'a> Iterator for WordIndices<'a> {
type Item = (usize, usize, &'a str);
#[inline]
fn next(&mut self) -> Option<(usize, usize, &'a str)> {
while let Some((start, end, word)) = self.0.next() {
if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
return Some((start, end, word));
}
}
None
}
}
/// An iterator over all word breaks in a byte string.
///
/// This iterator is typically constructed by
/// [`ByteSlice::words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks).
///
/// This iterator yields not only all words, but the content that comes between
/// words. In particular, if all elements yielded by this iterator are
/// concatenated, then the result is the original string (subject to Unicode
/// replacement codepoint substitutions).
///
/// Since words are made up of one or more codepoints, this iterator yields
/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
/// are [substituted](index.html#handling-of-invalid-utf-8).
///
/// This iterator yields words in accordance with the default word boundary
/// rules specified in
/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
/// In particular, this may not be suitable for Japanese and Chinese scripts
/// that do not use spaces between words.
#[derive(Clone, Debug)]
pub struct WordsWithBreaks<'a> {
bs: &'a [u8],
}
impl<'a> WordsWithBreaks<'a> {
pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreaks<'a> {
WordsWithBreaks { bs }
}
/// View the underlying data as a subslice of the original data.
///
/// The slice returned has the same lifetime as the original slice, and so
/// the iterator can continue to be used while this exists.
///
/// # Examples
///
/// ```
/// use bstr::ByteSlice;
///
/// let mut it = b"foo bar baz".words_with_breaks();
///
/// assert_eq!(b"foo bar baz", it.as_bytes());
/// it.next();
/// assert_eq!(b" bar baz", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b" baz", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b"", it.as_bytes());
/// ```
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
self.bs
}
}
impl<'a> Iterator for WordsWithBreaks<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
let (word, size) = decode_word(self.bs);
if size == 0 {
return None;
}
self.bs = &self.bs[size..];
Some(word)
}
}
/// An iterator over all word breaks in a byte string, along with their byte
/// index positions.
///
/// This iterator is typically constructed by
/// [`ByteSlice::words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices).
///
/// This iterator yields not only all words, but the content that comes between
/// words. In particular, if all elements yielded by this iterator are
/// concatenated, then the result is the original string (subject to Unicode
/// replacement codepoint substitutions).
///
/// Since words are made up of one or more codepoints, this iterator
/// yields `&str` elements (along with their start and end byte offsets).
/// When invalid UTF-8 is encountered, replacement codepoints are
/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
/// indices yielded by this iterator may not correspond to the length of the
/// word yielded with those indices. For example, when this iterator encounters
/// `\xFF` in the byte string, then it will yield a pair of indices ranging
/// over a single byte, but will provide an `&str` equivalent to `"\u{FFFD}"`,
/// which is three bytes in length. However, when given only valid UTF-8, then
/// all indices are in exact correspondence with their paired word.
///
/// This iterator yields words in accordance with the default word boundary
/// rules specified in
/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
/// In particular, this may not be suitable for Japanese and Chinese scripts
/// that do not use spaces between words.
#[derive(Clone, Debug)]
pub struct WordsWithBreakIndices<'a> {
bs: &'a [u8],
forward_index: usize,
}
impl<'a> WordsWithBreakIndices<'a> {
pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreakIndices<'a> {
WordsWithBreakIndices { bs: bs, forward_index: 0 }
}
/// View the underlying data as a subslice of the original data.
///
/// The slice returned has the same lifetime as the original slice, and so
/// the iterator can continue to be used while this exists.
///
/// # Examples
///
/// ```
/// use bstr::ByteSlice;
///
/// let mut it = b"foo bar baz".words_with_break_indices();
///
/// assert_eq!(b"foo bar baz", it.as_bytes());
/// it.next();
/// assert_eq!(b" bar baz", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b" baz", it.as_bytes());
/// it.next();
/// it.next();
/// assert_eq!(b"", it.as_bytes());
/// ```
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
self.bs
}
}
impl<'a> Iterator for WordsWithBreakIndices<'a> {
type Item = (usize, usize, &'a str);
#[inline]
fn next(&mut self) -> Option<(usize, usize, &'a str)> {
let index = self.forward_index;
let (word, size) = decode_word(self.bs);
if size == 0 {
return None;
}
self.bs = &self.bs[size..];
self.forward_index += size;
Some((index, index + size, word))
}
}
fn decode_word(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
} else if let Some(end) = WORD_BREAK_FWD.find(bs) {
// Safe because a match can only occur for valid UTF-8.
let word = unsafe { bs[..end].to_str_unchecked() };
(word, word.len())
} else {
const INVALID: &'static str = "\u{FFFD}";
// No match on non-empty bytes implies we found invalid UTF-8.
let (_, size) = utf8::decode_lossy(bs);
(INVALID, size)
}
}
#[cfg(test)]
mod tests {
use ucd_parse::WordBreakTest;
use ext_slice::ByteSlice;
#[test]
fn forward_ucd() {
for (i, test) in ucdtests().into_iter().enumerate() {
let given = test.words.concat();
let got = words(given.as_bytes());
assert_eq!(
test.words,
got,
"\n\nword forward break test {} failed:\n\
given: {:?}\n\
expected: {:?}\n\
got: {:?}\n",
i,
given,
strs_to_bstrs(&test.words),
strs_to_bstrs(&got),
);
}
}
// Some additional tests that don't seem to be covered by the UCD tests.
//
// It's pretty amazing that the UCD tests miss these cases. I only found
// them by running this crate's segmenter and ICU's segmenter on the same
// text and comparing the output.
#[test]
fn forward_additional() {
assert_eq!(vec!["a", ".", " ", "Y"], words(b"a. Y"));
assert_eq!(vec!["r", ".", " ", "Yo"], words(b"r. Yo"));
assert_eq!(
vec!["whatsoever", ".", " ", "You", " ", "may"],
words(b"whatsoever. You may")
);
assert_eq!(
vec!["21stcentury'syesterday"],
words(b"21stcentury'syesterday")
);
assert_eq!(vec!["Bonta_", "'", "s"], words(b"Bonta_'s"));
assert_eq!(vec!["_vhat's"], words(b"_vhat's"));
assert_eq!(vec!["__on'anima"], words(b"__on'anima"));
assert_eq!(vec!["123_", "'", "4"], words(b"123_'4"));
assert_eq!(vec!["_123'4"], words(b"_123'4"));
assert_eq!(vec!["__12'345"], words(b"__12'345"));
assert_eq!(
vec!["tomorrowat4", ":", "00", ","],
words(b"tomorrowat4:00,")
);
assert_eq!(vec!["RS1", "'", "s"], words(b"RS1's"));
assert_eq!(vec!["X38"], words(b"X38"));
assert_eq!(vec!["4abc", ":", "00", ","], words(b"4abc:00,"));
assert_eq!(vec!["12S", "'", "1"], words(b"12S'1"));
assert_eq!(vec!["1XY"], words(b"1XY"));
assert_eq!(vec!["\u{FEFF}", "Ты"], words("\u{FEFF}Ты".as_bytes()));
}
fn words(bytes: &[u8]) -> Vec<&str> {
bytes.words_with_breaks().collect()
}
fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
strs.iter().map(|s| s.as_ref().as_bytes()).collect()
}
/// Return all of the UCD for word breaks.
fn ucdtests() -> Vec<WordBreakTest> {
const TESTDATA: &'static str = include_str!("data/WordBreakTest.txt");
let mut tests = vec![];
for mut line in TESTDATA.lines() {
line = line.trim();
if line.starts_with("#") || line.contains("surrogate") {
continue;
}
tests.push(line.parse().unwrap());
}
tests
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,135 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load(
"@rules_rust//rust:rust.bzl",
"rust_binary",
"rust_library",
"rust_test",
)
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//third_party/cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # MIT from expression "MIT"
])
# Generated Targets
# buildifier: disable=load-on-top
load(
"@rules_rust//cargo:cargo_build_script.bzl",
"cargo_build_script",
)
cargo_build_script(
name = "mlua_build_script",
srcs = glob(["**/*.rs"]),
build_script_env = {
},
crate_features = [
"luajit",
],
crate_root = "build/main.rs",
data = glob(["**"]),
edition = "2018",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "0.5.3",
visibility = ["//visibility:private"],
deps = [
"//third_party/cargo/vendor/cc-1.0.67:cc",
"//third_party/cargo/vendor/pkg-config-0.3.19:pkg_config",
],
)
# Unsupported target "benchmark" with type "bench" omitted
# Unsupported target "async_http_client" with type "example" omitted
# Unsupported target "async_http_reqwest" with type "example" omitted
# Unsupported target "async_http_server" with type "example" omitted
# Unsupported target "async_tcp_server" with type "example" omitted
# Unsupported target "guided_tour" with type "example" omitted
# Unsupported target "repl" with type "example" omitted
# Unsupported target "serialize" with type "example" omitted
rust_library(
name = "mlua",
srcs = glob(["**/*.rs"]),
crate_features = [
"luajit",
],
crate_root = "src/lib.rs",
crate_type = "lib",
data = [],
edition = "2018",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"manual",
],
version = "0.5.3",
# buildifier: leave-alone
deps = [
":mlua_build_script",
"//third_party/cargo/vendor/bstr-0.2.15:bstr",
"//third_party/cargo/vendor/lazy_static-1.4.0:lazy_static",
"//third_party/cargo/vendor/num-traits-0.2.14:num_traits",
],
)
# Unsupported target "async" with type "test" omitted
# Unsupported target "byte_string" with type "test" omitted
# Unsupported target "compile" with type "test" omitted
# Unsupported target "function" with type "test" omitted
# Unsupported target "hooks" with type "test" omitted
# Unsupported target "memory" with type "test" omitted
# Unsupported target "scope" with type "test" omitted
# Unsupported target "serde" with type "test" omitted
# Unsupported target "string" with type "test" omitted
# Unsupported target "table" with type "test" omitted
# Unsupported target "tests" with type "test" omitted
# Unsupported target "thread" with type "test" omitted
# Unsupported target "types" with type "test" omitted
# Unsupported target "userdata" with type "test" omitted
# Unsupported target "value" with type "test" omitted

View File

@ -0,0 +1,45 @@
## v0.5.3
- Fixed bug when returning nil-prefixed multi values from async function (+ test)
- Performance optimisation for async callbacks (polling)
## v0.5.2
- Some performance optimisations (callbacks)
- `ToLua` implementation for `Cow<str>` and `Cow<CStr>`
- Fixed bug with `Scope` destruction of partially polled futures
## v0.5.1
- Support cross compilation that should work well for vendored builds (including LuaJIT with some restrictions)
- Fix numeric types conversion for 32bit Lua
- Update tokio to 1.0 for async examples
## v0.5.0
- Serde support under `serialize` feature flag.
- Re-export `mlua_derive`.
- impl `ToLua` and `FromLua` for `HashSet` and `BTreeSet`
## v0.4.2
- Added `Function::dump()` to dump lua function to a binary chunk
- Added `ChunkMode` enum to mark chunks as text or binary
- Updated `set_memory_limit` doc
## v0.4.0
- Lua 5.4 support with `MetaMethod::Close`.
- `lua53` feature is disabled by default. Now preferred Lua version have to be chosen explicitly.
- Provide safety guaraness for Lua state, which means that potenially unsafe operations, like loading C modules (using `require` or `package.loadlib`) are disabled. Equalient for the previous `Lua::new()` function is `Lua::unsafe_new()`.
- New `send` feature to require `Send`.
- New `module` feature, that disables linking to Lua Core Libraries. Required for modules.
- Don't allow `'callback` outlive `'lua` in `Lua::create_function()` to fix [the unsoundness](tests/compile/static_callback_args.rs).
- Added `Lua::into_static()` to make `'static` Lua state. This is useful to spawn async Lua threads that requires `'static`.
- New function `Lua::set_memory_limit()` (similar to `rlua`) to enable memory restrictions in Lua VM (requires Lua >= 5.2).
- `Scope`, temporary removed in v0.3, is back with async support.
- Removed deprecated `Table::call()` function.
- Added hooks support (backported from rlua 0.17).
- New `AnyUserData::has_metamethod()` function.
- LuaJIT 2.0.5 (the latest stable) support.
- Various bug fixes and improvements.

1685
third_party/cargo/vendor/mlua-0.5.3/Cargo.lock generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,143 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "mlua"
version = "0.5.3"
authors = ["Aleksandr Orlenko <zxteam@pm.me>", "kyren <catherine@chucklefish.org>"]
build = "build/main.rs"
links = "lua"
description = "High level bindings to Lua 5.4/5.3/5.2/5.1 (including LuaJIT)\nwith async/await features and support of writing native lua modules in Rust.\n"
documentation = "https://docs.rs/mlua"
readme = "README.md"
keywords = ["lua", "luajit", "async", "futures", "scripting"]
categories = ["api-bindings", "asynchronous"]
license = "MIT"
repository = "https://github.com/khvzak/mlua"
[package.metadata.docs.rs]
features = ["lua53", "async", "send", "serialize"]
rustdoc-args = ["--cfg", "docsrs"]
[[example]]
name = "async_http_client"
required-features = ["async"]
[[example]]
name = "async_http_reqwest"
required-features = ["async", "serialize"]
[[example]]
name = "async_http_server"
required-features = ["async", "send"]
[[example]]
name = "async_tcp_server"
required-features = ["async"]
[[example]]
name = "serialize"
required-features = ["serialize"]
[[bench]]
name = "benchmark"
harness = false
required-features = ["async"]
[dependencies.bstr]
version = "0.2"
features = ["std"]
default_features = false
[dependencies.erased-serde]
version = "0.3"
optional = true
[dependencies.futures-core]
version = "0.3.5"
optional = true
[dependencies.futures-task]
version = "0.3.5"
optional = true
[dependencies.futures-util]
version = "0.3.5"
optional = true
[dependencies.lazy_static]
version = "1.4"
[dependencies.mlua_derive]
version = "0.5"
optional = true
[dependencies.num-traits]
version = "0.2.14"
[dependencies.serde]
version = "1.0"
optional = true
[dev-dependencies.criterion]
version = "0.3.4"
features = ["html_reports", "async_tokio"]
[dev-dependencies.futures]
version = "0.3.5"
[dev-dependencies.futures-timer]
version = "3.0"
[dev-dependencies.hyper]
version = "0.14"
features = ["client", "server"]
[dev-dependencies.reqwest]
version = "0.11"
features = ["json"]
[dev-dependencies.rustyline]
version = "7.0"
[dev-dependencies.serde_json]
version = "1.0"
[dev-dependencies.tokio]
version = "1.0"
features = ["full"]
[dev-dependencies.trybuild]
version = "1.0"
[build-dependencies.cc]
version = "1.0"
[build-dependencies.lua-src]
version = ">= 540.0.0, < 550.0.0"
optional = true
[build-dependencies.luajit-src]
version = ">= 210.1.2, < 220.0.0"
optional = true
[build-dependencies.pkg-config]
version = "0.3.17"
[features]
async = ["futures-core", "futures-task", "futures-util"]
lua51 = []
lua52 = []
lua53 = []
lua54 = []
luajit = []
module = ["mlua_derive"]
send = []
serialize = ["serde", "erased-serde"]
vendored = ["lua-src", "luajit-src"]

View File

@ -0,0 +1,25 @@
Both mlua and rlua are distributed under the MIT license, which is reproduced
below:
MIT License
Copyright (c) 2019-2021 A. Orlenko
Copyright (c) 2017 rlua
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,239 @@
# mlua
[![Build Status]][github-actions] [![Latest Version]][crates.io] [![API Documentation]][docs.rs] [![Coverage Status]][codecov.io]
[Build Status]: https://github.com/khvzak/mlua/workflows/CI/badge.svg
[github-actions]: https://github.com/khvzak/mlua/actions
[Latest Version]: https://img.shields.io/crates/v/mlua.svg
[crates.io]: https://crates.io/crates/mlua
[API Documentation]: https://docs.rs/mlua/badge.svg
[docs.rs]: https://docs.rs/mlua
[Coverage Status]: https://codecov.io/gh/khvzak/mlua/branch/master/graph/badge.svg?token=99339FS1CG
[codecov.io]: https://codecov.io/gh/khvzak/mlua
[Guided Tour](examples/guided_tour.rs)
`mlua` is bindings to [Lua](https://www.lua.org) programming language for Rust with a goal to provide
_safe_ (as far as it's possible), high level, easy to use, practical and flexible API.
Started as [rlua](https://github.com/amethyst/rlua/tree/0.15.3) fork, `mlua` supports Lua 5.4, 5.3, 5.2 and 5.1 including LuaJIT (2.0.5 and 2.1 beta) and allows to write native Lua modules in Rust as well as use Lua in a standalone mode.
`mlua` tested on Windows/macOS/Linux including module mode in [GitHub Actions] on `x86_64` platform and cross-compilation to `aarch64` (other targes are also supported).
[GitHub Actions]: https://github.com/khvzak/mlua/actions
## Usage
### Feature flags
`mlua` uses feature flags to reduce the amount of depenendies, compiled code and allow to choose only required set of features.
Below is a list of the available feature flags. By default `mlua` does not enable any features.
* `lua54`: activate Lua [5.4] support
* `lua53`: activate Lua [5.3] support
* `lua52`: activate Lua [5.2] support
* `lua51`: activate Lua [5.1] support
* `luajit`: activate [LuaJIT] support
* `vendored`: build static Lua(JIT) library from sources during `mlua` compilation using [lua-src] or [luajit-src] crates
* `module`: enable module mode (building loadable `cdylib` library for Lua)
* `async`: enable async/await support (any executor can be used, eg. [tokio] or [async-std])
* `send`: make `mlua::Lua` transferable across thread boundaries (adds [`Send`] requirement to `mlua::Function` and `mlua::UserData`)
* `serialize`: add serialization and deserialization support to `mlua` types usign [serde] framework
[5.4]: https://www.lua.org/manual/5.4/manual.html
[5.3]: https://www.lua.org/manual/5.3/manual.html
[5.2]: https://www.lua.org/manual/5.2/manual.html
[5.1]: https://www.lua.org/manual/5.1/manual.html
[LuaJIT]: https://luajit.org/
[lua-src]: https://github.com/khvzak/lua-src-rs
[luajit-src]: https://github.com/khvzak/luajit-src-rs
[tokio]: https://github.com/tokio-rs/tokio
[async-std]: https://github.com/async-rs/async-std
[`Send`]: https://doc.rust-lang.org/std/marker/trait.Send.html
[serde]: https://github.com/serde-rs/serde
### Async/await support
`mlua` supports async/await for all Lua versions. This works using Lua [coroutines](https://www.lua.org/manual/5.3/manual.html#2.6) and require running [Thread](https://docs.rs/mlua/latest/mlua/struct.Thread.html) along with enabling `feature = "async"` in `Cargo.toml`.
**Examples**:
- [HTTP Client](examples/async_http_client.rs)
- [HTTP Client (json)](examples/async_http_reqwest.rs)
- [HTTP Server](examples/async_http_server.rs)
- [TCP Server](examples/async_tcp_server.rs)
### Serialization (serde) support
With `serialize` feature flag enabled, `mlua` allows you to serialize/deserialize any type that implements [`serde::Serialize`] and [`serde::Deserialize`] into/from [`mlua::Value`]. In addition `mlua` provides [`serde::Serialize`] trait implementation for it (including `UserData` support).
[Example](examples/serialize.rs)
[`serde::Serialize`]: https://docs.serde.rs/serde/ser/trait.Serialize.html
[`serde::Deserialize`]: https://docs.serde.rs/serde/de/trait.Deserialize.html
[`mlua::Value`]: https://docs.rs/mlua/latest/mlua/enum.Value.html
### Compiling
You have to enable one of the features `lua54`, `lua53`, `lua52`, `lua51` or `luajit`, according to the choosen Lua version.
By default `mlua` uses `pkg-config` tool to find lua includes and libraries for the chosen Lua version.
In most cases it works as desired, although sometimes could be more preferable to use a custom lua library.
To achieve this, mlua supports `LUA_INC`, `LUA_LIB`, `LUA_LIB_NAME` and `LUA_LINK` environment variables.
`LUA_LINK` is optional and may be `dylib` (a dynamic library) or `static` (a static library, `.a` archive).
An example how to use them:
``` sh
my_project $ LUA_INC=$HOME/tmp/lua-5.2.4/src LUA_LIB=$HOME/tmp/lua-5.2.4/src LUA_LIB_NAME=lua LUA_LINK=static cargo build
```
`mlua` also supports vendored lua/luajit using the auxilary crates [lua-src](https://crates.io/crates/lua-src) and
[luajit-src](https://crates.io/crates/luajit-src).
Just enable the `vendored` feature and cargo will automatically build and link specified lua/luajit version. This is the easiest way to get started with `mlua`.
### Standalone mode
In a standalone mode `mlua` allows to add to your application scripting support with a gently configured Lua runtime to ensure safety and soundness.
Add to `Cargo.toml` :
``` toml
[dependencies]
mlua = { version = "0.5", features = ["lua53", "vendored"] }
```
`main.rs`
``` rust
use mlua::prelude::*;
fn main() -> LuaResult<()> {
let lua = Lua::new();
let map_table = lua.create_table()?;
map_table.set(1, "one")?;
map_table.set("two", 2)?;
lua.globals().set("map_table", map_table)?;
lua.load("for k,v in pairs(map_table) do print(k,v) end").exec()?;
Ok(())
}
```
### Module mode
In a module mode `mlua` allows to create a compiled Lua module that can be loaded from Lua code using [`require`](https://www.lua.org/manual/5.3/manual.html#pdf-require). In this case `mlua` uses an external Lua runtime which could lead to potential unsafety due to unpredictability of the Lua environment and usage of libraries such as [`debug`](https://www.lua.org/manual/5.3/manual.html#6.10).
[Example](examples/module)
Add to `Cargo.toml` :
``` toml
[lib]
crate-type = ["cdylib"]
[dependencies]
mlua = { version = "0.5", features = ["lua53", "vendored", "module"] }
```
`lib.rs` :
``` rust
use mlua::prelude::*;
fn hello(_: &Lua, name: String) -> LuaResult<()> {
println!("hello, {}!", name);
Ok(())
}
#[mlua::lua_module]
fn my_module(lua: &Lua) -> LuaResult<LuaTable> {
let exports = lua.create_table()?;
exports.set("hello", lua.create_function(hello)?)?;
Ok(exports)
}
```
And then (**macOS** example):
``` sh
$ cargo rustc -- -C link-arg=-undefined -C link-arg=dynamic_lookup
$ ln -s ./target/debug/libmy_module.dylib ./my_module.so
$ lua5.3 -e 'require("my_module").hello("world")'
hello, world!
```
On macOS, you need to set additional linker arguments. One option is to compile with `cargo rustc --release -- -C link-arg=-undefined -C link-arg=dynamic_lookup`, the other is to create a `.cargo/config` with the following content:
``` toml
[target.x86_64-apple-darwin]
rustflags = [
"-C", "link-arg=-undefined",
"-C", "link-arg=dynamic_lookup",
]
[target.aarch64-apple-darwin]
rustflags = [
"-C", "link-arg=-undefined",
"-C", "link-arg=dynamic_lookup",
]
```
On Linux you can build modules normally with `cargo build --release`.
Vendored and non-vendored builds are supported for these OS.
On Windows `vendored` mode for modules is not supported since you need to link to a Lua dll.
Easiest way is to use either MinGW64 (as part of [MSYS2](https://github.com/msys2/msys2) package) with `pkg-config` or
MSVC with `LUA_INC` / `LUA_LIB` / `LUA_LIB_NAME` environment variables.
More details about compiling and linking Lua modules can be found on the [Building Modules](http://lua-users.org/wiki/BuildingModules) page.
## Safety
One of the `mlua` goals is to provide *safe* API between Rust and Lua.
Every place where the Lua C API may trigger an error longjmp in any way is protected by `lua_pcall`,
and the user of the library is protected from directly interacting with unsafe things like the Lua stack,
and there is overhead associated with this safety.
Unfortunately, `mlua` does not provide absolute safety even without using `unsafe` .
This library contains a huge amount of unsafe code. There are almost certainly bugs still lurking in this library!
It is surprisingly, fiendishly difficult to use the Lua C API without the potential for unsafety.
## Panic handling
`mlua` wraps panics that are generated inside Rust callbacks in a regular Lua error. Panics could be
resumed then by propagating the Lua error to Rust code.
For example:
``` rust
let lua = Lua::new();
let f = lua.create_function(|_, ()| -> LuaResult<()> {
panic!("test panic");
})?;
lua.globals().set("rust_func", f)?;
let _ = lua.load(r#"
local status, err = pcall(rust_func)
print(err) -- prints: test panic
error(err) -- propagate panic
"#).exec();
unreachable!()
```
`mlua` should also be panic safe in another way as well, which is that any `Lua` instances or handles
remains usable after a user generated panic, and such panics should not break internal invariants or
leak Lua stack space. This is mostly important to safely use `mlua` types in Drop impls, as you should not be
using panics for general error handling.
Below is a list of `mlua` behaviors that should be considered a bug.
If you encounter them, a bug report would be very welcome:
+ If your program panics with a message that contains the string "mlua internal error", this is a bug.
+ The above is true even for the internal panic about running out of stack space! There are a few ways to generate normal script errors by running out of stack, but if you encounter a *panic* based on running out of stack, this is a bug.
+ Lua C API errors are handled by lonjmp. All instances where the Lua C API would otherwise longjmp over calling stack frames should be guarded against, except in internal callbacks where this is intentional. If you detect that `mlua` is triggering a longjmp over your Rust stack frames, this is a bug!
+ If you detect that, after catching a panic or during a Drop triggered from a panic, a `Lua` or handle method is triggering other bugs or there is a Lua stack space leak, this is a bug. `mlua` instances are supposed to remain fully usable in the face of user generated panics. This guarantee does not extend to panics marked with "mlua internal error" simply because that is already indicative of a separate bug.
## License
This project is licensed under the [MIT license](LICENSE)

View File

@ -0,0 +1,283 @@
#![cfg_attr(
all(feature = "luajit", target_os = "macos", target_arch = "x86_64"),
feature(link_args)
)]
#[cfg_attr(
all(feature = "luajit", target_os = "macos", target_arch = "x86_64"),
link_args = "-pagezero_size 10000 -image_base 100000000",
allow(unused_attributes)
)]
extern "system" {}
use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
use std::time::Duration;
use tokio::runtime::Runtime;
use tokio::task;
use mlua::prelude::*;
fn collect_gc_twice(lua: &Lua) {
lua.gc_collect().unwrap();
lua.gc_collect().unwrap();
}
fn create_table(c: &mut Criterion) {
let lua = Lua::new();
c.bench_function("create [table empty]", |b| {
b.iter_batched(
|| collect_gc_twice(&lua),
|_| {
lua.create_table().unwrap();
},
BatchSize::SmallInput,
);
});
}
fn create_array(c: &mut Criterion) {
let lua = Lua::new();
c.bench_function("create [array] 10", |b| {
b.iter_batched(
|| collect_gc_twice(&lua),
|_| {
let table = lua.create_table().unwrap();
for i in 1..=10 {
table.set(i, i).unwrap();
}
},
BatchSize::SmallInput,
);
});
}
fn create_string_table(c: &mut Criterion) {
let lua = Lua::new();
c.bench_function("create [table string] 10", |b| {
b.iter_batched(
|| collect_gc_twice(&lua),
|_| {
let table = lua.create_table().unwrap();
for &s in &["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] {
let s = lua.create_string(s).unwrap();
table.set(s.clone(), s).unwrap();
}
},
BatchSize::SmallInput,
);
});
}
fn call_lua_function(c: &mut Criterion) {
let lua = Lua::new();
c.bench_function("call Lua function [sum] 3 10", |b| {
b.iter_batched_ref(
|| {
collect_gc_twice(&lua);
lua.load("function(a, b, c) return a + b + c end")
.eval::<LuaFunction>()
.unwrap()
},
|function| {
for i in 0..10 {
let _result: i64 = function.call((i, i + 1, i + 2)).unwrap();
}
},
BatchSize::SmallInput,
);
});
}
fn call_sum_callback(c: &mut Criterion) {
let lua = Lua::new();
let callback = lua
.create_function(|_, (a, b, c): (i64, i64, i64)| Ok(a + b + c))
.unwrap();
lua.globals().set("callback", callback).unwrap();
c.bench_function("call Rust callback [sum] 3 10", |b| {
b.iter_batched_ref(
|| {
collect_gc_twice(&lua);
lua.load("function() for i = 1,10 do callback(i, i+1, i+2) end end")
.eval::<LuaFunction>()
.unwrap()
},
|function| {
function.call::<_, ()>(()).unwrap();
},
BatchSize::SmallInput,
);
});
}
fn call_async_sum_callback(c: &mut Criterion) {
let lua = Lua::new();
let callback = lua
.create_async_function(|_, (a, b, c): (i64, i64, i64)| async move {
task::yield_now().await;
Ok(a + b + c)
})
.unwrap();
lua.globals().set("callback", callback).unwrap();
c.bench_function("call async Rust callback [sum] 3 10", |b| {
let rt = Runtime::new().unwrap();
b.to_async(rt).iter_batched(
|| {
collect_gc_twice(&lua);
lua.load("function() for i = 1,10 do callback(i, i+1, i+2) end end")
.eval::<LuaFunction>()
.unwrap()
},
|function| async move {
function.call_async::<_, ()>(()).await.unwrap();
},
BatchSize::SmallInput,
);
});
}
fn call_concat_callback(c: &mut Criterion) {
let lua = Lua::new();
let callback = lua
.create_function(|_, (a, b): (LuaString, LuaString)| {
Ok(format!("{}{}", a.to_str()?, b.to_str()?))
})
.unwrap();
lua.globals().set("callback", callback).unwrap();
c.bench_function("call Rust callback [concat string] 10", |b| {
b.iter_batched_ref(
|| {
collect_gc_twice(&lua);
lua.load("function() for i = 1,10 do callback('a', tostring(i)) end end")
.eval::<LuaFunction>()
.unwrap()
},
|function| {
function.call::<_, ()>(()).unwrap();
},
BatchSize::SmallInput,
);
});
}
fn create_registry_values(c: &mut Criterion) {
let lua = Lua::new();
c.bench_function("create [registry value] 10", |b| {
b.iter_batched(
|| collect_gc_twice(&lua),
|_| {
for _ in 0..10 {
lua.create_registry_value(lua.pack(true).unwrap()).unwrap();
}
lua.expire_registry_values();
},
BatchSize::SmallInput,
);
});
}
fn create_userdata(c: &mut Criterion) {
struct UserData(i64);
impl LuaUserData for UserData {}
let lua = Lua::new();
c.bench_function("create [table userdata] 10", |b| {
b.iter_batched(
|| collect_gc_twice(&lua),
|_| {
let table: LuaTable = lua.create_table().unwrap();
for i in 1..11 {
table.set(i, UserData(i)).unwrap();
}
},
BatchSize::SmallInput,
);
});
}
fn call_userdata_method(c: &mut Criterion) {
struct UserData(i64);
impl LuaUserData for UserData {
fn add_methods<'lua, M: LuaUserDataMethods<'lua, Self>>(methods: &mut M) {
methods.add_method("method", |_, this, ()| Ok(this.0));
}
}
let lua = Lua::new();
lua.globals().set("userdata", UserData(10)).unwrap();
c.bench_function("call [userdata method] 10", |b| {
b.iter_batched_ref(
|| {
collect_gc_twice(&lua);
lua.load("function() for i = 1,10 do userdata:method() end end")
.eval::<LuaFunction>()
.unwrap()
},
|function| {
function.call::<_, ()>(()).unwrap();
},
BatchSize::SmallInput,
);
});
}
fn call_async_userdata_method(c: &mut Criterion) {
#[derive(Clone, Copy)]
struct UserData(i64);
impl LuaUserData for UserData {
fn add_methods<'lua, M: LuaUserDataMethods<'lua, Self>>(methods: &mut M) {
methods.add_async_method("method", |_, this, ()| async move { Ok(this.0) });
}
}
let lua = Lua::new();
lua.globals().set("userdata", UserData(10)).unwrap();
c.bench_function("call async [userdata method] 10", |b| {
let rt = Runtime::new().unwrap();
b.to_async(rt).iter_batched(
|| {
collect_gc_twice(&lua);
lua.load("function() for i = 1,10 do userdata:method() end end")
.eval::<LuaFunction>()
.unwrap()
},
|function| async move {
function.call_async::<_, ()>(()).await.unwrap();
},
BatchSize::SmallInput,
);
});
}
criterion_group! {
name = benches;
config = Criterion::default()
.sample_size(300)
.measurement_time(Duration::from_secs(10))
.noise_threshold(0.02);
targets =
create_table,
create_array,
create_string_table,
call_lua_function,
call_sum_callback,
call_async_sum_callback,
call_concat_callback,
create_registry_values,
create_userdata,
call_userdata_method,
call_async_userdata_method,
}
criterion_main!(benches);

View File

@ -0,0 +1,5 @@
use std::path::PathBuf;
pub fn probe_lua() -> PathBuf {
unreachable!()
}

View File

@ -0,0 +1,138 @@
use std::env;
use std::fs::File;
use std::io::{BufRead, BufReader, Result};
use std::ops::Bound;
use std::path::{Path, PathBuf};
pub fn probe_lua() -> PathBuf {
let include_dir = env::var_os("LUA_INC").unwrap_or_default();
let lib_dir = env::var_os("LUA_LIB").unwrap_or_default();
let lua_lib = env::var_os("LUA_LIB_NAME").unwrap_or_default();
println!("cargo:rerun-if-env-changed=LUA_INC");
println!("cargo:rerun-if-env-changed=LUA_LIB");
println!("cargo:rerun-if-env-changed=LUA_LIB_NAME");
println!("cargo:rerun-if-env-changed=LUA_LINK");
let need_lua_lib = cfg!(any(not(feature = "module"), target_os = "windows"));
if include_dir != "" && (!need_lua_lib || lib_dir != "") {
if lua_lib == "" {
panic!("LUA_LIB_NAME is not set");
}
let _version = use_custom_lua(&include_dir, &lib_dir, &lua_lib).unwrap();
return PathBuf::from(include_dir);
}
// Find using `pkg-config`
#[cfg(feature = "lua54")]
{
let mut lua = pkg_config::Config::new()
.range_version((Bound::Included("5.4"), Bound::Excluded("5.5")))
.cargo_metadata(need_lua_lib)
.probe("lua");
if lua.is_err() {
lua = pkg_config::Config::new()
.cargo_metadata(need_lua_lib)
.probe("lua5.4");
}
lua.unwrap().include_paths[0].clone()
}
#[cfg(feature = "lua53")]
{
let mut lua = pkg_config::Config::new()
.range_version((Bound::Included("5.3"), Bound::Excluded("5.4")))
.cargo_metadata(need_lua_lib)
.probe("lua");
if lua.is_err() {
lua = pkg_config::Config::new()
.cargo_metadata(need_lua_lib)
.probe("lua5.3");
}
lua.unwrap().include_paths[0].clone()
}
#[cfg(feature = "lua52")]
{
let mut lua = pkg_config::Config::new()
.range_version((Bound::Included("5.2"), Bound::Excluded("5.3")))
.cargo_metadata(need_lua_lib)
.probe("lua");
if lua.is_err() {
lua = pkg_config::Config::new()
.cargo_metadata(need_lua_lib)
.probe("lua5.2");
}
lua.unwrap().include_paths[0].clone()
}
#[cfg(feature = "lua51")]
{
let mut lua = pkg_config::Config::new()
.range_version((Bound::Included("5.1"), Bound::Excluded("5.2")))
.cargo_metadata(need_lua_lib)
.probe("lua");
if lua.is_err() {
lua = pkg_config::Config::new()
.cargo_metadata(need_lua_lib)
.probe("lua5.1");
}
lua.unwrap().include_paths[0].clone()
}
#[cfg(feature = "luajit")]
{
let lua = pkg_config::Config::new()
.range_version((Bound::Included("2.0.5"), Bound::Unbounded))
.cargo_metadata(need_lua_lib)
.probe("luajit");
lua.unwrap().include_paths[0].clone()
}
}
fn use_custom_lua<S: AsRef<Path>>(include_dir: &S, lib_dir: &S, lua_lib: &S) -> Result<String> {
let mut version_found = String::new();
// Find LUA_VERSION_NUM
let mut lua_h_path = include_dir.as_ref().to_owned();
lua_h_path.push("lua.h");
let f = File::open(lua_h_path)?;
let reader = BufReader::new(f);
for line in reader.lines() {
let line = line?;
let parts = line.split_whitespace().collect::<Vec<_>>();
if parts.len() == 3 && parts[1] == "LUA_VERSION_NUM" {
version_found = parts[2].to_string();
}
}
let link_lib = match env::var("LUA_LINK") {
Ok(s) if s == "static" => "static=",
_ => "",
};
if cfg!(any(not(feature = "module"), target_os = "windows")) {
println!(
"cargo:rustc-link-search=native={}",
lib_dir.as_ref().display()
);
println!(
"cargo:rustc-link-lib={}{}",
link_lib,
lua_lib.as_ref().display()
);
}
Ok(version_found)
}

View File

@ -0,0 +1,19 @@
use std::path::PathBuf;
pub fn probe_lua() -> PathBuf {
#[cfg(feature = "lua54")]
let artifacts = lua_src::Build::new().build(lua_src::Lua54);
#[cfg(feature = "lua53")]
let artifacts = lua_src::Build::new().build(lua_src::Lua53);
#[cfg(feature = "lua52")]
let artifacts = lua_src::Build::new().build(lua_src::Lua52);
#[cfg(feature = "lua51")]
let artifacts = lua_src::Build::new().build(lua_src::Lua51);
#[cfg(feature = "luajit")]
let artifacts = luajit_src::Build::new().build();
#[cfg(not(feature = "module"))]
artifacts.print_cargo_metadata();
artifacts.include_dir().to_owned()
}

View File

@ -0,0 +1,240 @@
#![allow(unreachable_code)]
use std::env;
use std::fs::File;
use std::io::{Error, ErrorKind, Result, Write};
use std::path::{Path, PathBuf};
use std::process::Command;
#[cfg_attr(
all(
feature = "vendored",
any(
feature = "lua54",
feature = "lua53",
feature = "lua52",
feature = "lua51",
feature = "luajit"
)
),
path = "find_vendored.rs"
)]
#[cfg_attr(
all(
not(feature = "vendored"),
any(
feature = "lua54",
feature = "lua53",
feature = "lua52",
feature = "lua51",
feature = "luajit"
)
),
path = "find_normal.rs"
)]
#[cfg_attr(
not(any(
feature = "lua54",
feature = "lua53",
feature = "lua52",
feature = "lua51",
feature = "luajit"
)),
path = "find_dummy.rs"
)]
mod find;
trait CommandExt {
fn execute(&mut self) -> Result<()>;
}
impl CommandExt for Command {
/// Execute the command and return an error if it exited with a failure status.
fn execute(&mut self) -> Result<()> {
self.status()
.and_then(|status| {
if status.success() {
Ok(())
} else {
Err(Error::new(ErrorKind::Other, "non-zero exit code"))
}
})
.map_err(|_| {
Error::new(
ErrorKind::Other,
format!("The command {:?} did not run successfully.", self),
)
})
}
}
fn build_glue<P: AsRef<Path> + std::fmt::Debug>(include_path: &P) {
let build_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap());
let mut config = cc::Build::new();
config.include(include_path);
// Compile and run glue.c
let glue = build_dir.join("glue");
config
.get_compiler()
.to_command()
.arg("src/ffi/glue/glue.c")
.arg("-o")
.arg(&glue)
.execute()
.unwrap();
Command::new(glue)
.arg(build_dir.join("glue.rs"))
.execute()
.unwrap();
}
// When cross-compiling, we cannot use `build_glue` as we cannot run the generated
// executable. Instead, let's take a stab at synthesizing the likely values.
// If you're cross-compiling and using a non-vendored library then there is a chance
// that the values selected here may be incorrect, but we have no way to determine
// that here.
fn generate_glue() -> Result<()> {
let build_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap());
let mut glue = File::create(build_dir.join("glue.rs"))?;
writeln!(
glue,
"/* This file was generated by build/main.rs; do not modify by hand */"
)?;
writeln!(glue, "use std::os::raw::*;")?;
writeln!(glue, "/* luaconf.h */")?;
let pointer_bit_width: usize = env::var("CARGO_CFG_TARGET_POINTER_WIDTH")
.unwrap()
.parse()
.unwrap();
writeln!(
glue,
"pub const LUA_EXTRASPACE: c_int = {} / 8;",
pointer_bit_width
)?;
// This is generally hardcoded to this size
writeln!(glue, "pub const LUA_IDSIZE: c_int = 60;")?;
// Unless the target is restricted, the defaults are 64 bit
writeln!(glue, "pub type LUA_NUMBER = c_double;")?;
writeln!(glue, "pub type LUA_INTEGER = i64;")?;
writeln!(glue, "pub type LUA_UNSIGNED = u64;")?;
writeln!(glue, "/* lua.h */")?;
let version = if cfg!(any(feature = "luajit", feature = "lua51")) {
(5, 1, 0)
} else if cfg!(feature = "lua52") {
(5, 2, 0)
} else if cfg!(feature = "lua53") {
(5, 3, 0)
} else if cfg!(feature = "lua54") {
(5, 4, 0)
} else {
unreachable!();
};
writeln!(
glue,
"pub const LUA_VERSION_NUM: c_int = {};",
(version.0 * 100) + version.1
)?;
let max_stack = if pointer_bit_width >= 32 {
1_000_000
} else {
15_000
};
writeln!(
glue,
"pub const LUA_REGISTRYINDEX: c_int = -{} - 1000;",
max_stack
)?;
// These two are only defined in lua 5.1
writeln!(glue, "pub const LUA_ENVIRONINDEX: c_int = -10001;")?;
writeln!(glue, "pub const LUA_GLOBALSINDEX: c_int = -10002;")?;
writeln!(glue, "/* lauxlib.h */")?;
// This is only defined in lua 5.3 and up, but we can always generate its value here,
// even if we don't use it.
// This matches the default definition in lauxlib.h
writeln!(glue, "pub const LUAL_NUMSIZES: c_int = std::mem::size_of::<LUA_INTEGER>() as c_int * 16 + std::mem::size_of::<LUA_NUMBER>() as c_int;")?;
writeln!(glue, "/* lualib.h */")?;
write!(
glue,
r#"
#[cfg(feature = "luajit")]
pub const LUA_BITLIBNAME: &str = "bit";
#[cfg(not(feature = "luajit"))]
pub const LUA_BITLIBNAME: &str = "bit32";
pub const LUA_COLIBNAME: &str = "coroutine";
pub const LUA_DBLIBNAME: &str = "debug";
pub const LUA_IOLIBNAME: &str = "io";
pub const LUA_LOADLIBNAME: &str = "package";
pub const LUA_MATHLIBNAME: &str = "math";
pub const LUA_OSLIBNAME: &str = "os";
pub const LUA_STRLIBNAME: &str = "string";
pub const LUA_TABLIBNAME: &str = "table";
pub const LUA_UTF8LIBNAME: &str = "utf8";
pub const LUA_JITLIBNAME: &str = "jit";
pub const LUA_FFILIBNAME: &str = "ffi";
"#
)?;
Ok(())
}
fn main() {
#[cfg(not(any(
feature = "lua54",
feature = "lua53",
feature = "lua52",
feature = "lua51",
feature = "luajit"
)))]
compile_error!("You must enable one of the features: lua54, lua53, lua52, lua51, luajit");
#[cfg(all(
feature = "lua54",
any(
feature = "lua53",
feature = "lua52",
feature = "lua51",
feature = "luajit"
)
))]
compile_error!("You can enable only one of the features: lua54, lua53, lua52, lua51, luajit");
#[cfg(all(
feature = "lua53",
any(feature = "lua52", feature = "lua51", feature = "luajit")
))]
compile_error!("You can enable only one of the features: lua54, lua53, lua52, lua51, luajit");
#[cfg(all(feature = "lua52", any(feature = "lua51", feature = "luajit")))]
compile_error!("You can enable only one of the features: lua54, lua53, lua52, lua51, luajit");
#[cfg(all(feature = "lua51", feature = "luajit"))]
compile_error!("You can enable only one of the features: lua54, lua53, lua52, lua51, luajit");
// We don't support "vendored module" mode on windows
#[cfg(all(feature = "vendored", feature = "module", target_os = "windows"))]
compile_error!(
"Vendored (static) builds are not supported for modules on Windows.\n"
+ "Please, use `pkg-config` or custom mode to link to a Lua dll."
);
let include_dir = find::probe_lua();
if env::var("TARGET").unwrap() != env::var("HOST").unwrap() {
generate_glue().unwrap();
} else {
build_glue(&include_dir);
}
}

View File

@ -0,0 +1,83 @@
use std::collections::HashMap;
use std::sync::Arc;
use bstr::BString;
use hyper::body::{Body as HyperBody, HttpBody as _};
use hyper::Client as HyperClient;
use tokio::sync::Mutex;
use mlua::{Error, Lua, Result, UserData, UserDataMethods};
#[derive(Clone)]
struct BodyReader(Arc<Mutex<HyperBody>>);
impl BodyReader {
fn new(body: HyperBody) -> Self {
BodyReader(Arc::new(Mutex::new(body)))
}
}
impl UserData for BodyReader {
fn add_methods<'lua, M: UserDataMethods<'lua, Self>>(methods: &mut M) {
methods.add_async_method("read", |_, reader, ()| async move {
let mut reader = reader.0.lock().await;
if let Some(bytes) = reader.data().await {
let bytes = bytes.map_err(Error::external)?;
return Ok(Some(BString::from(bytes.as_ref())));
}
Ok(None)
});
}
}
#[tokio::main]
async fn main() -> Result<()> {
let lua = Lua::new();
let fetch_url = lua.create_async_function(|lua, uri: String| async move {
let client = HyperClient::new();
let uri = uri.parse().map_err(Error::external)?;
let resp = client.get(uri).await.map_err(Error::external)?;
let lua_resp = lua.create_table()?;
lua_resp.set("status", resp.status().as_u16())?;
let mut headers = HashMap::new();
for (key, value) in resp.headers().iter() {
headers
.entry(key.as_str())
.or_insert(Vec::new())
.push(value.to_str().unwrap());
}
lua_resp.set("headers", headers)?;
lua_resp.set("body", BodyReader::new(resp.into_body()))?;
Ok(lua_resp)
})?;
let globals = lua.globals();
globals.set("fetch_url", fetch_url)?;
let f = lua
.load(
r#"
local res = fetch_url(...)
print(res.status)
for key, vals in pairs(res.headers) do
for _, val in ipairs(vals) do
print(key..": "..val)
end
end
repeat
local body = res.body:read()
if body then
print(body)
end
until not body
"#,
)
.into_function()?;
f.call_async("http://httpbin.org/ip").await
}

View File

@ -0,0 +1,41 @@
use mlua::{Error, Lua, LuaSerdeExt, Result};
#[tokio::main]
async fn main() -> Result<()> {
let lua = Lua::new();
let globals = lua.globals();
globals.set("null", lua.null()?)?;
let fetch_json = lua.create_async_function(|lua, uri: String| async move {
let resp = reqwest::get(&uri)
.await
.and_then(|resp| resp.error_for_status())
.map_err(Error::external)?;
let json = resp
.json::<serde_json::Value>()
.await
.map_err(Error::external)?;
lua.to_value(&json)
})?;
globals.set("fetch_json", fetch_json)?;
let f = lua
.load(
r#"
function print_r(t, indent)
local indent = indent or ''
for k, v in pairs(t) do
io.write(indent, tostring(k))
if type(v) == "table" then io.write(':\n') print_r(v, indent..' ')
else io.write(': ', v == null and "null" or tostring(v), '\n') end
end
end
local res = fetch_json(...)
print_r(res)
"#,
)
.into_function()?;
f.call_async("https://httpbin.org/anything?arg0=val0").await
}

View File

@ -0,0 +1,101 @@
use std::net::SocketAddr;
use std::sync::Arc;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Request, Response, Server};
use mlua::{Error, Function, Lua, Result, Table, UserData, UserDataMethods};
#[derive(Clone)]
struct LuaRequest(Arc<(SocketAddr, Request<Body>)>);
impl UserData for LuaRequest {
fn add_methods<'lua, M: UserDataMethods<'lua, Self>>(methods: &mut M) {
methods.add_method("remote_addr", |_lua, req, ()| Ok((req.0).0.to_string()));
methods.add_method("method", |_lua, req, ()| Ok((req.0).1.method().to_string()));
}
}
async fn run_server(handler: Function<'static>) -> Result<()> {
let make_svc = make_service_fn(|socket: &AddrStream| {
let remote_addr = socket.remote_addr();
let handler = handler.clone();
async move {
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
let handler = handler.clone();
async move {
let lua_req = LuaRequest(Arc::new((remote_addr, req)));
let lua_resp: Table = handler.call_async(lua_req).await?;
let body = lua_resp
.get::<_, Option<String>>("body")?
.unwrap_or_default();
let mut resp = Response::builder()
.status(lua_resp.get::<_, Option<u16>>("status")?.unwrap_or(200));
if let Some(headers) = lua_resp.get::<_, Option<Table>>("headers")? {
for pair in headers.pairs::<String, String>() {
let (h, v) = pair?;
resp = resp.header(&h, v);
}
}
Ok::<_, Error>(resp.body(Body::from(body)).unwrap())
}
}))
}
});
let addr = ([127, 0, 0, 1], 3000).into();
let server = Server::bind(&addr).executor(LocalExec).serve(make_svc);
println!("Listening on http://{}", addr);
tokio::task::LocalSet::new()
.run_until(server)
.await
.map_err(Error::external)
}
#[tokio::main]
async fn main() -> Result<()> {
let lua = Lua::new().into_static();
let handler: Function = lua
.load(
r#"
function(req)
return {
status = 200,
headers = {
["X-Req-Method"] = req:method(),
["X-Remote-Addr"] = req:remote_addr(),
},
body = "Hello, World!"
}
end
"#,
)
.eval()?;
run_server(handler).await?;
// Consume the static reference and drop it.
// This is safe as long as we don't hold any other references to Lua
// or alive resources.
unsafe { Lua::from_static(lua) };
Ok(())
}
#[derive(Clone, Copy, Debug)]
struct LocalExec;
impl<F> hyper::rt::Executor<F> for LocalExec
where
F: std::future::Future + 'static, // not requiring `Send`
{
fn execute(&self, fut: F) {
tokio::task::spawn_local(fut);
}
}

View File

@ -0,0 +1,128 @@
use std::sync::Arc;
use bstr::BString;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::{TcpListener, TcpStream};
use tokio::sync::Mutex;
use tokio::task;
use mlua::{Function, Lua, Result, UserData, UserDataMethods};
#[derive(Clone)]
struct LuaTcp;
#[derive(Clone)]
struct LuaTcpListener(Arc<Mutex<TcpListener>>);
#[derive(Clone)]
struct LuaTcpStream(Arc<Mutex<TcpStream>>);
impl UserData for LuaTcp {
fn add_methods<'lua, M: UserDataMethods<'lua, Self>>(methods: &mut M) {
methods.add_async_function("bind", |_, addr: String| async move {
let listener = TcpListener::bind(addr).await?;
Ok(LuaTcpListener(Arc::new(Mutex::new(listener))))
});
}
}
impl UserData for LuaTcpListener {
fn add_methods<'lua, M: UserDataMethods<'lua, Self>>(methods: &mut M) {
methods.add_async_method("accept", |_, listener, ()| async move {
let (stream, _) = listener.0.lock().await.accept().await?;
Ok(LuaTcpStream(Arc::new(Mutex::new(stream))))
});
}
}
impl UserData for LuaTcpStream {
fn add_methods<'lua, M: UserDataMethods<'lua, Self>>(methods: &mut M) {
methods.add_async_method("peer_addr", |_, stream, ()| async move {
Ok(stream.0.lock().await.peer_addr()?.to_string())
});
methods.add_async_method("read", |_, stream, size: usize| async move {
let mut buf = vec![0; size];
let n = stream.0.lock().await.read(&mut buf).await?;
buf.truncate(n);
Ok(BString::from(buf))
});
methods.add_async_method("write", |_, stream, data: BString| async move {
let n = stream.0.lock().await.write(&data).await?;
Ok(n)
});
methods.add_async_method("close", |_, stream, ()| async move {
stream.0.lock().await.shutdown().await?;
Ok(())
});
}
}
async fn run_server(lua: &'static Lua) -> Result<()> {
let spawn = lua.create_function(move |_, func: Function| {
task::spawn_local(async move { func.call_async::<_, ()>(()).await });
Ok(())
})?;
let globals = lua.globals();
globals.set("tcp", LuaTcp)?;
globals.set("spawn", spawn)?;
let server = lua
.load(
r#"
local addr = ...
local listener = tcp.bind(addr)
print("listening on "..addr)
local accept_new = true
while true do
local stream = listener:accept()
local peer_addr = stream:peer_addr()
print("connected from "..peer_addr)
if not accept_new then
return
end
spawn(function()
while true do
local data = stream:read(100)
data = data:match("^%s*(.-)%s*$") -- trim
print("["..peer_addr.."] "..data)
if data == "bye" then
stream:write("bye bye\n")
stream:close()
return
end
if data == "exit" then
stream:close()
accept_new = false
return
end
stream:write("echo: "..data.."\n")
end
end)
end
"#,
)
.into_function()?;
task::LocalSet::new()
.run_until(server.call_async::<_, ()>("0.0.0.0:1234"))
.await
}
#[tokio::main]
async fn main() {
let lua = Lua::new().into_static();
run_server(lua).await.unwrap();
// Consume the static reference and drop it.
// This is safe as long as we don't hold any other references to Lua
// or alive resources.
unsafe { Lua::from_static(lua) };
}

View File

@ -0,0 +1,201 @@
use std::f32;
use std::iter::FromIterator;
use mlua::{Function, Lua, MetaMethod, Result, UserData, UserDataMethods, Variadic};
fn main() -> Result<()> {
// You can create a new Lua state with `Lua::new()`. This loads the default Lua std library
// *without* the debug library.
let lua = Lua::new();
// You can get and set global variables. Notice that the globals table here is a permanent
// reference to _G, and it is mutated behind the scenes as Lua code is loaded. This API is
// based heavily around sharing and internal mutation (just like Lua itself).
let globals = lua.globals();
globals.set("string_var", "hello")?;
globals.set("int_var", 42)?;
assert_eq!(globals.get::<_, String>("string_var")?, "hello");
assert_eq!(globals.get::<_, i64>("int_var")?, 42);
// You can load and evaluate Lua code. The returned type of `Lua::load` is a builder
// that allows you to change settings before running Lua code. Here, we are using it to set
// the name of the laoded chunk to "example code", which will be used when Lua error
// messages are printed.
lua.load(
r#"
global = 'foo'..'bar'
"#,
)
.set_name("example code")?
.exec()?;
assert_eq!(globals.get::<_, String>("global")?, "foobar");
assert_eq!(lua.load("1 + 1").eval::<i32>()?, 2);
assert_eq!(lua.load("false == false").eval::<bool>()?, true);
assert_eq!(lua.load("return 1 + 2").eval::<i32>()?, 3);
// You can create and manage Lua tables
let array_table = lua.create_table()?;
array_table.set(1, "one")?;
array_table.set(2, "two")?;
array_table.set(3, "three")?;
assert_eq!(array_table.len()?, 3);
let map_table = lua.create_table()?;
map_table.set("one", 1)?;
map_table.set("two", 2)?;
map_table.set("three", 3)?;
let v: i64 = map_table.get("two")?;
assert_eq!(v, 2);
// You can pass values like `Table` back into Lua
globals.set("array_table", array_table)?;
globals.set("map_table", map_table)?;
lua.load(
r#"
for k, v in pairs(array_table) do
print(k, v)
end
for k, v in pairs(map_table) do
print(k, v)
end
"#,
)
.exec()?;
// You can load Lua functions
let print: Function = globals.get("print")?;
print.call::<_, ()>("hello from rust")?;
// This API generally handles variadics using tuples. This is one way to call a function with
// multiple parameters:
print.call::<_, ()>(("hello", "again", "from", "rust"))?;
// But, you can also pass variadic arguments with the `Variadic` type.
print.call::<_, ()>(Variadic::from_iter(
["hello", "yet", "again", "from", "rust"].iter().cloned(),
))?;
// You can bind rust functions to Lua as well. Callbacks receive the Lua state inself as their
// first parameter, and the arguments given to the function as the second parameter. The type
// of the arguments can be anything that is convertible from the parameters given by Lua, in
// this case, the function expects two string sequences.
let check_equal = lua.create_function(|_, (list1, list2): (Vec<String>, Vec<String>)| {
// This function just checks whether two string lists are equal, and in an inefficient way.
// Lua callbacks return `mlua::Result`, an Ok value is a normal return, and an Err return
// turns into a Lua 'error'. Again, any type that is convertible to Lua may be returned.
Ok(list1 == list2)
})?;
globals.set("check_equal", check_equal)?;
// You can also accept runtime variadic arguments to rust callbacks.
let join = lua.create_function(|_, strings: Variadic<String>| {
// (This is quadratic!, it's just an example!)
Ok(strings.iter().fold("".to_owned(), |a, b| a + b))
})?;
globals.set("join", join)?;
assert_eq!(
lua.load(r#"check_equal({"a", "b", "c"}, {"a", "b", "c"})"#)
.eval::<bool>()?,
true
);
assert_eq!(
lua.load(r#"check_equal({"a", "b", "c"}, {"d", "e", "f"})"#)
.eval::<bool>()?,
false
);
assert_eq!(lua.load(r#"join("a", "b", "c")"#).eval::<String>()?, "abc");
// Callbacks receive a Lua state as their first parameter so that they can use it to
// create new Lua values, if necessary.
let create_table = lua.create_function(|lua, ()| {
let t = lua.create_table()?;
t.set(1, 1)?;
t.set(2, 2)?;
Ok(t)
})?;
globals.set("create_table", create_table)?;
assert_eq!(lua.load(r#"create_table()[2]"#).eval::<i32>()?, 2);
// You can create userdata with methods and metamethods defined on them.
// Here's a worked example that shows many of the features of this API
// together
#[derive(Copy, Clone)]
struct Vec2(f32, f32);
impl UserData for Vec2 {
fn add_methods<'lua, M: UserDataMethods<'lua, Self>>(methods: &mut M) {
methods.add_method("magnitude", |_, vec, ()| {
let mag_squared = vec.0 * vec.0 + vec.1 * vec.1;
Ok(mag_squared.sqrt())
});
methods.add_meta_function(MetaMethod::Add, |_, (vec1, vec2): (Vec2, Vec2)| {
Ok(Vec2(vec1.0 + vec2.0, vec1.1 + vec2.1))
});
}
}
let vec2_constructor = lua.create_function(|_, (x, y): (f32, f32)| Ok(Vec2(x, y)))?;
globals.set("vec2", vec2_constructor)?;
assert!(
(lua.load("(vec2(1, 2) + vec2(2, 2)):magnitude()")
.eval::<f32>()?
- 5.0)
.abs()
< f32::EPSILON
);
// Normally, Rust types passed to `Lua` must be `'static`, because there is no way to be
// sure of their lifetime inside the Lua state. There is, however, a limited way to lift this
// requirement. You can call `Lua::scope` to create userdata and callbacks types that only live
// for as long as the call to scope, but do not have to be `'static` (and `Send`).
{
let mut rust_val = 0;
lua.scope(|scope| {
// We create a 'sketchy' Lua callback that holds a mutable reference to the variable
// `rust_val`. Outside of a `Lua::scope` call, this would not be allowed
// because it could be unsafe.
lua.globals().set(
"sketchy",
scope.create_function_mut(|_, ()| {
rust_val = 42;
Ok(())
})?,
)?;
lua.load("sketchy()").exec()
})?;
assert_eq!(rust_val, 42);
}
// We were able to run our 'sketchy' function inside the scope just fine. However, if we
// try to run our 'sketchy' function outside of the scope, the function we created will have
// been invalidated and we will generate an error. If our function wasn't invalidated, we
// might be able to improperly access the freed `rust_val` which would be unsafe.
assert!(lua.load("sketchy()").exec().is_err());
Ok(())
}

View File

@ -0,0 +1,48 @@
//! This example shows a simple read-evaluate-print-loop (REPL).
use mlua::{Error, Lua, MultiValue};
use rustyline::Editor;
fn main() {
let lua = Lua::new();
let mut editor = Editor::<()>::new();
loop {
let mut prompt = "> ";
let mut line = String::new();
loop {
match editor.readline(prompt) {
Ok(input) => line.push_str(&input),
Err(_) => return,
}
match lua.load(&line).eval::<MultiValue>() {
Ok(values) => {
editor.add_history_entry(line);
println!(
"{}",
values
.iter()
.map(|value| format!("{:?}", value))
.collect::<Vec<_>>()
.join("\t")
);
break;
}
Err(Error::SyntaxError {
incomplete_input: true,
..
}) => {
// continue reading input and append it to `line`
line.push_str("\n"); // separate input lines
prompt = ">> ";
}
Err(e) => {
eprintln!("error: {}", e);
break;
}
}
}
}
}

View File

@ -0,0 +1,66 @@
use mlua::{Error, Lua, LuaSerdeExt, Result, UserData, Value};
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
enum Transmission {
Manual,
Automatic,
}
#[derive(Serialize, Deserialize)]
struct Engine {
v: u32,
kw: u32,
}
#[derive(Serialize, Deserialize)]
struct Car {
active: bool,
model: String,
transmission: Transmission,
engine: Engine,
}
impl UserData for Car {}
fn main() -> Result<()> {
let lua = Lua::new();
let globals = lua.globals();
// Create Car struct from a Lua table
let car: Car = lua.from_value(lua.load(r#"
{active = true, model = "Volkswagen Golf", transmission = "Automatic", engine = {v = 1499, kw = 90}}
"#).eval()?)?;
// Set it as (serializable) userdata
globals.set("null", lua.null()?)?;
globals.set("array_mt", lua.array_metatable()?)?;
globals.set("car", lua.create_ser_userdata(car)?)?;
// Create a Lua table with multiple data types
let val: Value = lua
.load(r#"{driver = "Boris", car = car, price = null, points = setmetatable({}, array_mt)}"#)
.eval()?;
// Serialize the table above to JSON
let json_str = serde_json::to_string(&val).map_err(Error::external)?;
println!("{}", json_str);
// Create Lua Value from JSON (or any serializable type)
let json = serde_json::json!({
"key": "value",
"null": null,
"array": [],
});
globals.set("json_value", lua.to_value(&json)?)?;
lua.load(
r#"
assert(json_value["key"] == "value")
assert(json_value["null"] == null)
assert(#(json_value["array"]) == 0)
"#,
)
.exec()?;
Ok(())
}

View File

@ -0,0 +1,569 @@
use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::ffi::{CStr, CString};
use std::hash::{BuildHasher, Hash};
use std::string::String as StdString;
use bstr::{BStr, BString};
use num_traits::cast;
use crate::error::{Error, Result};
use crate::function::Function;
use crate::lua::Lua;
use crate::string::String;
use crate::table::Table;
use crate::thread::Thread;
use crate::types::{LightUserData, MaybeSend};
use crate::userdata::{AnyUserData, UserData};
use crate::value::{FromLua, Nil, ToLua, Value};
impl<'lua> ToLua<'lua> for Value<'lua> {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
Ok(self)
}
}
impl<'lua> FromLua<'lua> for Value<'lua> {
fn from_lua(lua_value: Value<'lua>, _: &'lua Lua) -> Result<Self> {
Ok(lua_value)
}
}
impl<'lua> ToLua<'lua> for String<'lua> {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::String(self))
}
}
impl<'lua> FromLua<'lua> for String<'lua> {
fn from_lua(value: Value<'lua>, lua: &'lua Lua) -> Result<String<'lua>> {
let ty = value.type_name();
lua.coerce_string(value)?
.ok_or_else(|| Error::FromLuaConversionError {
from: ty,
to: "String",
message: Some("expected string or number".to_string()),
})
}
}
impl<'lua> ToLua<'lua> for Table<'lua> {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Table(self))
}
}
impl<'lua> FromLua<'lua> for Table<'lua> {
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<Table<'lua>> {
match value {
Value::Table(table) => Ok(table),
_ => Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "table",
message: None,
}),
}
}
}
impl<'lua> ToLua<'lua> for Function<'lua> {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Function(self))
}
}
impl<'lua> FromLua<'lua> for Function<'lua> {
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<Function<'lua>> {
match value {
Value::Function(table) => Ok(table),
_ => Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "function",
message: None,
}),
}
}
}
impl<'lua> ToLua<'lua> for Thread<'lua> {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Thread(self))
}
}
impl<'lua> FromLua<'lua> for Thread<'lua> {
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<Thread<'lua>> {
match value {
Value::Thread(t) => Ok(t),
_ => Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "thread",
message: None,
}),
}
}
}
impl<'lua> ToLua<'lua> for AnyUserData<'lua> {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::UserData(self))
}
}
impl<'lua> FromLua<'lua> for AnyUserData<'lua> {
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<AnyUserData<'lua>> {
match value {
Value::UserData(ud) => Ok(ud),
_ => Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "userdata",
message: None,
}),
}
}
}
impl<'lua, T: 'static + MaybeSend + UserData> ToLua<'lua> for T {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::UserData(lua.create_userdata(self)?))
}
}
impl<'lua, T: 'static + UserData + Clone> FromLua<'lua> for T {
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<T> {
match value {
Value::UserData(ud) => Ok(ud.borrow::<T>()?.clone()),
_ => Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "userdata",
message: None,
}),
}
}
}
impl<'lua> ToLua<'lua> for Error {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Error(self))
}
}
impl<'lua> FromLua<'lua> for Error {
fn from_lua(value: Value<'lua>, lua: &'lua Lua) -> Result<Error> {
match value {
Value::Error(err) => Ok(err),
val => Ok(Error::RuntimeError(
lua.coerce_string(val)?
.and_then(|s| Some(s.to_str().ok()?.to_owned()))
.unwrap_or_else(|| "<unprintable error>".to_owned()),
)),
}
}
}
impl<'lua> ToLua<'lua> for bool {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Boolean(self))
}
}
impl<'lua> FromLua<'lua> for bool {
fn from_lua(v: Value<'lua>, _: &'lua Lua) -> Result<Self> {
match v {
Value::Nil => Ok(false),
Value::Boolean(b) => Ok(b),
_ => Ok(true),
}
}
}
impl<'lua> ToLua<'lua> for LightUserData {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::LightUserData(self))
}
}
impl<'lua> FromLua<'lua> for LightUserData {
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<Self> {
match value {
Value::LightUserData(ud) => Ok(ud),
_ => Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "light userdata",
message: None,
}),
}
}
}
impl<'lua> ToLua<'lua> for StdString {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::String(lua.create_string(&self)?))
}
}
impl<'lua> FromLua<'lua> for StdString {
fn from_lua(value: Value<'lua>, lua: &'lua Lua) -> Result<Self> {
let ty = value.type_name();
Ok(lua
.coerce_string(value)?
.ok_or_else(|| Error::FromLuaConversionError {
from: ty,
to: "String",
message: Some("expected string or number".to_string()),
})?
.to_str()?
.to_owned())
}
}
impl<'lua> ToLua<'lua> for &str {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::String(lua.create_string(self)?))
}
}
impl<'lua> ToLua<'lua> for Cow<'_, str> {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::String(lua.create_string(self.as_bytes())?))
}
}
impl<'lua> ToLua<'lua> for CString {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::String(lua.create_string(self.as_bytes())?))
}
}
impl<'lua> FromLua<'lua> for CString {
fn from_lua(value: Value<'lua>, lua: &'lua Lua) -> Result<Self> {
let ty = value.type_name();
let string = lua
.coerce_string(value)?
.ok_or_else(|| Error::FromLuaConversionError {
from: ty,
to: "CString",
message: Some("expected string or number".to_string()),
})?;
match CStr::from_bytes_with_nul(string.as_bytes_with_nul()) {
Ok(s) => Ok(s.into()),
Err(_) => Err(Error::FromLuaConversionError {
from: ty,
to: "CString",
message: Some("invalid C-style string".to_string()),
}),
}
}
}
impl<'lua> ToLua<'lua> for &CStr {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::String(lua.create_string(self.to_bytes())?))
}
}
impl<'lua> ToLua<'lua> for Cow<'_, CStr> {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::String(lua.create_string(self.to_bytes())?))
}
}
impl<'lua> ToLua<'lua> for BString {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::String(lua.create_string(&self)?))
}
}
impl<'lua> FromLua<'lua> for BString {
fn from_lua(value: Value<'lua>, lua: &'lua Lua) -> Result<Self> {
let ty = value.type_name();
Ok(BString::from(
lua.coerce_string(value)?
.ok_or_else(|| Error::FromLuaConversionError {
from: ty,
to: "String",
message: Some("expected string or number".to_string()),
})?
.as_bytes()
.to_vec(),
))
}
}
impl<'lua> ToLua<'lua> for &BStr {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::String(lua.create_string(&self)?))
}
}
macro_rules! lua_convert_int {
($x:ty) => {
impl<'lua> ToLua<'lua> for $x {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
if let Some(i) = cast(self) {
Ok(Value::Integer(i))
} else {
cast(self)
.ok_or_else(|| Error::ToLuaConversionError {
from: stringify!($x),
to: "number",
message: Some("out of range".to_owned()),
})
.map(Value::Number)
}
}
}
impl<'lua> FromLua<'lua> for $x {
fn from_lua(value: Value<'lua>, lua: &'lua Lua) -> Result<Self> {
let ty = value.type_name();
(if let Some(i) = lua.coerce_integer(value.clone())? {
cast(i)
} else {
cast(lua.coerce_number(value)?.ok_or_else(|| {
Error::FromLuaConversionError {
from: ty,
to: stringify!($x),
message: Some(
"expected number or string coercible to number".to_string(),
),
}
})?)
})
.ok_or_else(|| Error::FromLuaConversionError {
from: ty,
to: stringify!($x),
message: Some("out of range".to_owned()),
})
}
}
};
}
lua_convert_int!(i8);
lua_convert_int!(u8);
lua_convert_int!(i16);
lua_convert_int!(u16);
lua_convert_int!(i32);
lua_convert_int!(u32);
lua_convert_int!(i64);
lua_convert_int!(u64);
//lua_convert_int!(i128);
//lua_convert_int!(u128);
lua_convert_int!(isize);
lua_convert_int!(usize);
macro_rules! lua_convert_float {
($x:ty) => {
impl<'lua> ToLua<'lua> for $x {
fn to_lua(self, _: &'lua Lua) -> Result<Value<'lua>> {
cast(self)
.ok_or_else(|| Error::ToLuaConversionError {
from: stringify!($x),
to: "number",
message: Some("out of range".to_string()),
})
.map(Value::Number)
}
}
impl<'lua> FromLua<'lua> for $x {
fn from_lua(value: Value<'lua>, lua: &'lua Lua) -> Result<Self> {
let ty = value.type_name();
lua.coerce_number(value)?
.ok_or_else(|| Error::FromLuaConversionError {
from: ty,
to: stringify!($x),
message: Some("expected number or string coercible to number".to_string()),
})
.and_then(|n| {
cast(n).ok_or_else(|| Error::FromLuaConversionError {
from: ty,
to: stringify!($x),
message: Some("number out of range".to_string()),
})
})
}
}
};
}
lua_convert_float!(f32);
lua_convert_float!(f64);
impl<'lua, T> ToLua<'lua> for &[T]
where
T: Clone + ToLua<'lua>,
{
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Table(
lua.create_sequence_from(self.iter().cloned())?,
))
}
}
macro_rules! lua_convert_array {
($($N:literal)+) => {
$(
impl<'lua, T> ToLua<'lua> for [T; $N]
where
T: Clone + ToLua<'lua>,
{
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
(&self).to_lua(lua)
}
}
impl<'lua, T> ToLua<'lua> for &[T; $N]
where
T: Clone + ToLua<'lua>,
{
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Table(
lua.create_sequence_from(self.iter().cloned())?,
))
}
}
)+
}
}
lua_convert_array! {
0 1 2 3 4 5 6 7 8 9
10 11 12 13 14 15 16 17 18 19
20 21 22 23 24 25 26 27 28 29
30 31 32
}
impl<'lua, T: ToLua<'lua>> ToLua<'lua> for Vec<T> {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Table(lua.create_sequence_from(self)?))
}
}
impl<'lua, T: FromLua<'lua>> FromLua<'lua> for Vec<T> {
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<Self> {
if let Value::Table(table) = value {
table.sequence_values().collect()
} else {
Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "Vec",
message: Some("expected table".to_string()),
})
}
}
}
impl<'lua, K: Eq + Hash + ToLua<'lua>, V: ToLua<'lua>, S: BuildHasher> ToLua<'lua>
for HashMap<K, V, S>
{
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Table(lua.create_table_from(self)?))
}
}
impl<'lua, K: Eq + Hash + FromLua<'lua>, V: FromLua<'lua>, S: BuildHasher + Default> FromLua<'lua>
for HashMap<K, V, S>
{
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<Self> {
if let Value::Table(table) = value {
table.pairs().collect()
} else {
Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "HashMap",
message: Some("expected table".to_string()),
})
}
}
}
impl<'lua, K: Ord + ToLua<'lua>, V: ToLua<'lua>> ToLua<'lua> for BTreeMap<K, V> {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Table(lua.create_table_from(self)?))
}
}
impl<'lua, K: Ord + FromLua<'lua>, V: FromLua<'lua>> FromLua<'lua> for BTreeMap<K, V> {
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<Self> {
if let Value::Table(table) = value {
table.pairs().collect()
} else {
Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "BTreeMap",
message: Some("expected table".to_string()),
})
}
}
}
impl<'lua, T: Eq + Hash + ToLua<'lua>, S: BuildHasher> ToLua<'lua> for HashSet<T, S> {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Table(lua.create_table_from(
self.into_iter().map(|val| (val, true)),
)?))
}
}
impl<'lua, T: Eq + Hash + FromLua<'lua>, S: BuildHasher + Default> FromLua<'lua> for HashSet<T, S> {
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<Self> {
if let Value::Table(table) = value {
table
.pairs::<T, Value<'lua>>()
.map(|res| res.map(|(k, _)| k))
.collect()
} else {
Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "HashSet",
message: Some("expected table".to_string()),
})
}
}
}
impl<'lua, T: Ord + ToLua<'lua>> ToLua<'lua> for BTreeSet<T> {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
Ok(Value::Table(lua.create_table_from(
self.into_iter().map(|val| (val, true)),
)?))
}
}
impl<'lua, T: Ord + FromLua<'lua>> FromLua<'lua> for BTreeSet<T> {
fn from_lua(value: Value<'lua>, _: &'lua Lua) -> Result<Self> {
if let Value::Table(table) = value {
table
.pairs::<T, Value<'lua>>()
.map(|res| res.map(|(k, _)| k))
.collect()
} else {
Err(Error::FromLuaConversionError {
from: value.type_name(),
to: "BTreeSet",
message: Some("expected table".to_string()),
})
}
}
}
impl<'lua, T: ToLua<'lua>> ToLua<'lua> for Option<T> {
fn to_lua(self, lua: &'lua Lua) -> Result<Value<'lua>> {
match self {
Some(val) => val.to_lua(lua),
None => Ok(Nil),
}
}
}
impl<'lua, T: FromLua<'lua>> FromLua<'lua> for Option<T> {
fn from_lua(value: Value<'lua>, lua: &'lua Lua) -> Result<Self> {
match value {
Nil => Ok(None),
value => Ok(Some(T::from_lua(value, lua)?)),
}
}
}

View File

@ -0,0 +1,329 @@
use std::error::Error as StdError;
use std::fmt;
use std::io::Error as IoError;
use std::net::AddrParseError;
use std::result::Result as StdResult;
use std::str::Utf8Error;
use std::string::String as StdString;
use std::sync::Arc;
/// Error type returned by `mlua` methods.
#[derive(Debug, Clone)]
pub enum Error {
/// Syntax error while parsing Lua source code.
SyntaxError {
/// The error message as returned by Lua.
message: StdString,
/// `true` if the error can likely be fixed by appending more input to the source code.
///
/// This is useful for implementing REPLs as they can query the user for more input if this
/// is set.
incomplete_input: bool,
},
/// Lua runtime error, aka `LUA_ERRRUN`.
///
/// The Lua VM returns this error when a builtin operation is performed on incompatible types.
/// Among other things, this includes invoking operators on wrong types (such as calling or
/// indexing a `nil` value).
RuntimeError(StdString),
/// Lua memory error, aka `LUA_ERRMEM`
///
/// The Lua VM returns this error when the allocator does not return the requested memory, aka
/// it is an out-of-memory error.
MemoryError(StdString),
/// Lua garbage collector error, aka `LUA_ERRGCMM`.
///
/// The Lua VM returns this error when there is an error running a `__gc` metamethod.
#[cfg(any(feature = "lua53", feature = "lua52"))]
GarbageCollectorError(StdString),
/// Potentially unsafe action in safe mode.
SafetyError(StdString),
/// Setting memory limit is not available.
///
/// This error can only happen when Lua state was not created by us and does not have the
/// custom allocator attached.
MemoryLimitNotAvailable,
/// Main thread is not available.
///
/// This error can only happen in Lua5.1/LuaJIT module mode, when module loaded within a coroutine.
/// These Lua versions does not have `LUA_RIDX_MAINTHREAD` registry key.
MainThreadNotAvailable,
/// A mutable callback has triggered Lua code that has called the same mutable callback again.
///
/// This is an error because a mutable callback can only be borrowed mutably once.
RecursiveMutCallback,
/// Either a callback or a userdata method has been called, but the callback or userdata has
/// been destructed.
///
/// This can happen either due to to being destructed in a previous __gc, or due to being
/// destructed from exiting a `Lua::scope` call.
CallbackDestructed,
/// Not enough stack space to place arguments to Lua functions or return values from callbacks.
///
/// Due to the way `mlua` works, it should not be directly possible to run out of stack space
/// during normal use. The only way that this error can be triggered is if a `Function` is
/// called with a huge number of arguments, or a rust callback returns a huge number of return
/// values.
StackError,
/// Too many arguments to `Function::bind`
BindError,
/// A Rust value could not be converted to a Lua value.
ToLuaConversionError {
/// Name of the Rust type that could not be converted.
from: &'static str,
/// Name of the Lua type that could not be created.
to: &'static str,
/// A message indicating why the conversion failed in more detail.
message: Option<StdString>,
},
/// A Lua value could not be converted to the expected Rust type.
FromLuaConversionError {
/// Name of the Lua type that could not be converted.
from: &'static str,
/// Name of the Rust type that could not be created.
to: &'static str,
/// A string containing more detailed error information.
message: Option<StdString>,
},
/// [`Thread::resume`] was called on an inactive coroutine.
///
/// A coroutine is inactive if its main function has returned or if an error has occured inside
/// the coroutine.
///
/// [`Thread::status`] can be used to check if the coroutine can be resumed without causing this
/// error.
///
/// [`Thread::resume`]: struct.Thread.html#method.resume
/// [`Thread::status`]: struct.Thread.html#method.status
CoroutineInactive,
/// An [`AnyUserData`] is not the expected type in a borrow.
///
/// This error can only happen when manually using [`AnyUserData`], or when implementing
/// metamethods for binary operators. Refer to the documentation of [`UserDataMethods`] for
/// details.
///
/// [`AnyUserData`]: struct.AnyUserData.html
/// [`UserDataMethods`]: trait.UserDataMethods.html
UserDataTypeMismatch,
/// An [`AnyUserData`] borrow failed because it has been destructed.
///
/// This error can happen either due to to being destructed in a previous __gc, or due to being
/// destructed from exiting a `Lua::scope` call.
///
/// [`AnyUserData`]: struct.AnyUserData.html
UserDataDestructed,
/// An [`AnyUserData`] immutable borrow failed because it is already borrowed mutably.
///
/// This error can occur when a method on a [`UserData`] type calls back into Lua, which then
/// tries to call a method on the same [`UserData`] type. Consider restructuring your API to
/// prevent these errors.
///
/// [`AnyUserData`]: struct.AnyUserData.html
/// [`UserData`]: trait.UserData.html
UserDataBorrowError,
/// An [`AnyUserData`] mutable borrow failed because it is already borrowed.
///
/// This error can occur when a method on a [`UserData`] type calls back into Lua, which then
/// tries to call a method on the same [`UserData`] type. Consider restructuring your API to
/// prevent these errors.
///
/// [`AnyUserData`]: struct.AnyUserData.html
/// [`UserData`]: trait.UserData.html
UserDataBorrowMutError,
/// A `RegistryKey` produced from a different Lua state was used.
MismatchedRegistryKey,
/// A Rust callback returned `Err`, raising the contained `Error` as a Lua error.
CallbackError {
/// Lua call stack backtrace.
traceback: StdString,
/// Original error returned by the Rust code.
cause: Arc<Error>,
},
/// Serialization error.
#[cfg(feature = "serialize")]
#[cfg_attr(docsrs, doc(cfg(feature = "serialize")))]
SerializeError(StdString),
/// Deserialization error.
#[cfg(feature = "serialize")]
#[cfg_attr(docsrs, doc(cfg(feature = "serialize")))]
DeserializeError(StdString),
/// A custom error.
///
/// This can be used for returning user-defined errors from callbacks.
///
/// Returning `Err(ExternalError(...))` from a Rust callback will raise the error as a Lua
/// error. The Rust code that originally invoked the Lua code then receives a `CallbackError`,
/// from which the original error (and a stack traceback) can be recovered.
#[cfg(feature = "send")]
ExternalError(Arc<dyn StdError + Send + Sync>),
#[cfg(not(feature = "send"))]
ExternalError(Arc<dyn StdError>),
}
/// A specialized `Result` type used by `mlua`'s API.
pub type Result<T> = StdResult<T, Error>;
impl fmt::Display for Error {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::SyntaxError { ref message, .. } => write!(fmt, "syntax error: {}", message),
Error::RuntimeError(ref msg) => write!(fmt, "runtime error: {}", msg),
Error::MemoryError(ref msg) => {
write!(fmt, "memory error: {}", msg)
}
#[cfg(any(feature = "lua53", feature = "lua52"))]
Error::GarbageCollectorError(ref msg) => {
write!(fmt, "garbage collector error: {}", msg)
}
Error::SafetyError(ref msg) => {
write!(fmt, "safety error: {}", msg)
},
Error::MemoryLimitNotAvailable => {
write!(fmt, "setting memory limit is not available")
}
Error::MainThreadNotAvailable => {
write!(fmt, "main thread is not available in Lua 5.1")
}
Error::RecursiveMutCallback => write!(fmt, "mutable callback called recursively"),
Error::CallbackDestructed => write!(
fmt,
"a destructed callback or destructed userdata method was called"
),
Error::StackError => write!(
fmt,
"out of Lua stack, too many arguments to a Lua function or too many return values from a callback"
),
Error::BindError => write!(
fmt,
"too many arguments to Function::bind"
),
Error::ToLuaConversionError {
from,
to,
ref message,
} => {
write!(fmt, "error converting {} to Lua {}", from, to)?;
match *message {
None => Ok(()),
Some(ref message) => write!(fmt, " ({})", message),
}
}
Error::FromLuaConversionError {
from,
to,
ref message,
} => {
write!(fmt, "error converting Lua {} to {}", from, to)?;
match *message {
None => Ok(()),
Some(ref message) => write!(fmt, " ({})", message),
}
}
Error::CoroutineInactive => write!(fmt, "cannot resume inactive coroutine"),
Error::UserDataTypeMismatch => write!(fmt, "userdata is not expected type"),
Error::UserDataDestructed => write!(fmt, "userdata has been destructed"),
Error::UserDataBorrowError => write!(fmt, "userdata already mutably borrowed"),
Error::UserDataBorrowMutError => write!(fmt, "userdata already borrowed"),
Error::MismatchedRegistryKey => {
write!(fmt, "RegistryKey used from different Lua state")
}
Error::CallbackError { ref traceback, .. } => {
write!(fmt, "callback error: {}", traceback)
}
#[cfg(feature = "serialize")]
Error::SerializeError(ref err) => {
write!(fmt, "serialize error: {}", err)
},
#[cfg(feature = "serialize")]
Error::DeserializeError(ref err) => {
write!(fmt, "deserialize error: {}", err)
},
Error::ExternalError(ref err) => write!(fmt, "{}", err),
}
}
}
impl StdError for Error {
fn source(&self) -> Option<&(dyn StdError + 'static)> {
match *self {
Error::CallbackError { ref cause, .. } => Some(cause.as_ref()),
Error::ExternalError(ref err) => err.source(),
_ => None,
}
}
}
impl Error {
#[cfg(feature = "send")]
pub fn external<T: Into<Box<dyn StdError + Send + Sync>>>(err: T) -> Error {
Error::ExternalError(err.into().into())
}
#[cfg(not(feature = "send"))]
pub fn external<T: Into<Box<dyn StdError>>>(err: T) -> Error {
Error::ExternalError(err.into().into())
}
}
pub trait ExternalError {
fn to_lua_err(self) -> Error;
}
#[cfg(feature = "send")]
impl<E: Into<Box<dyn StdError + Send + Sync>>> ExternalError for E {
fn to_lua_err(self) -> Error {
Error::external(self)
}
}
#[cfg(not(feature = "send"))]
impl<E: Into<Box<dyn StdError>>> ExternalError for E {
fn to_lua_err(self) -> Error {
Error::external(self)
}
}
pub trait ExternalResult<T> {
fn to_lua_err(self) -> Result<T>;
}
impl<T, E> ExternalResult<T> for StdResult<T, E>
where
E: ExternalError,
{
fn to_lua_err(self) -> Result<T> {
self.map_err(|e| e.to_lua_err())
}
}
impl std::convert::From<AddrParseError> for Error {
fn from(err: AddrParseError) -> Self {
Error::external(err)
}
}
impl std::convert::From<IoError> for Error {
fn from(err: IoError) -> Self {
Error::external(err)
}
}
impl std::convert::From<Utf8Error> for Error {
fn from(err: Utf8Error) -> Self {
Error::external(err)
}
}
#[cfg(feature = "serialize")]
impl serde::ser::Error for Error {
fn custom<T: fmt::Display>(msg: T) -> Self {
Self::SerializeError(msg.to_string())
}
}
#[cfg(feature = "serialize")]
impl serde::de::Error for Error {
fn custom<T: fmt::Display>(msg: T) -> Self {
Self::DeserializeError(msg.to_string())
}
}

View File

@ -0,0 +1,809 @@
// The MIT License (MIT)
//
// Copyright (c) 2019-2021 A. Orlenko
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
// Based on github.com/keplerproject/lua-compat-5.3
#![allow(clippy::needless_return)]
use std::ffi::CStr;
use std::mem;
use std::os::raw::{c_char, c_int, c_void};
use std::ptr;
use super::lauxlib::{
luaL_callmeta, luaL_error, luaL_getmetafield_old, luaL_loadbuffer, luaL_newmetatable_old,
};
#[cfg(any(feature = "lua51", feature = "luajit"))]
use super::lauxlib::{luaL_Reg, luaL_checktype, luaL_getmetatable};
#[cfg(feature = "lua52")]
use super::lauxlib::{luaL_checkstack, luaL_getsubtable};
use super::lua::{
self, lua_CFunction, lua_Debug, lua_Integer, lua_Number, lua_State, lua_Writer, lua_call,
lua_createtable, lua_dump_old, lua_error, lua_getfield_old, lua_getstack, lua_gettable_old,
lua_gettop, lua_insert, lua_isstring, lua_istable, lua_newuserdata, lua_pop, lua_pushboolean,
lua_pushcfunction, lua_pushfstring, lua_pushinteger, lua_pushliteral, lua_pushlstring_old,
lua_pushnil, lua_pushnumber, lua_pushthread, lua_pushvalue, lua_rawget_old, lua_rawgeti_old,
lua_rawset, lua_replace, lua_setfield, lua_setglobal, lua_setmetatable, lua_settable,
lua_toboolean, lua_tointeger, lua_tolstring, lua_tonumber, lua_topointer, lua_tostring,
lua_touserdata, lua_type, lua_typename,
};
#[cfg(any(feature = "lua51", feature = "luajit"))]
use super::lua::{
lua_checkstack, lua_concat, lua_equal, lua_getfenv, lua_getinfo, lua_getmetatable,
lua_isnumber, lua_lessthan, lua_newtable, lua_next, lua_objlen, lua_pushcclosure,
lua_pushlightuserdata, lua_pushstring_old, lua_rawequal, lua_remove, lua_resume_old,
lua_setfenv, lua_settop, LUA_OPADD, LUA_OPUNM,
};
#[cfg(feature = "lua52")]
use super::lua::{
lua_absindex, lua_getglobal_old, lua_getuservalue_old, lua_pushstring, lua_rawgetp_old,
lua_rawsetp, lua_tonumberx,
};
unsafe fn compat53_reverse(L: *mut lua_State, mut a: c_int, mut b: c_int) {
while a < b {
lua_pushvalue(L, a);
lua_pushvalue(L, b);
lua_replace(L, a);
lua_replace(L, b);
a += 1;
b -= 1;
}
}
const COMPAT53_LEVELS1: c_int = 12; // size of the first part of the stack
const COMPAT53_LEVELS2: c_int = 10; // size of the second part of the stack
unsafe fn compat53_countlevels(L: *mut lua_State) -> c_int {
let mut ar: lua_Debug = mem::zeroed();
let (mut li, mut le) = (1, 1);
// find an upper bound
while lua_getstack(L, le, &mut ar) != 0 {
li = le;
le *= 2;
}
// do a binary search
while li < le {
let m = (li + le) / 2;
if lua_getstack(L, m, &mut ar) != 0 {
li = m + 1
} else {
le = m;
}
}
le - 1
}
unsafe fn compat53_checkmode(
L: *mut lua_State,
mode: *const c_char,
modename: *const c_char,
err: c_int,
) -> c_int {
unsafe fn strchr(s: *const c_char, c: c_char) -> *const c_char {
let mut st = s;
while *st != 0 && *st != c {
st = st.offset(1);
}
if *st == c {
st
} else {
ptr::null()
}
}
if !mode.is_null() && strchr(mode, *modename).is_null() {
lua_pushfstring(
L,
cstr!("attempt to load a %s chunk (mode is '%s')"),
modename,
mode,
);
return err;
}
lua::LUA_OK
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
unsafe fn compat53_findfield(L: *mut lua_State, objidx: c_int, level: c_int) -> c_int {
if level == 0 || lua_istable(L, -1) == 0 {
return 0; // not found
}
lua_pushnil(L); // start 'next' loop
while lua_next(L, -2) != 0 {
// for each pair in table
if lua_type(L, -2) == lua::LUA_TSTRING {
// ignore non-string keys
if lua_rawequal(L, objidx, -1) != 0 {
// found object?
lua_pop(L, 1); // remove value (but keep name)
return 1;
} else if compat53_findfield(L, objidx, level - 1) != 0 {
// try recursively
lua_remove(L, -2); // remove table (but keep name)
lua_pushliteral(L, ".");
lua_insert(L, -2); // place '.' between the two names
lua_concat(L, 3);
return 1;
}
}
lua_pop(L, 1); // remove value
}
return 0; // not found
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
unsafe fn compat53_pushglobalfuncname(L: *mut lua_State, ar: *mut lua_Debug) -> c_int {
let top = lua_gettop(L);
lua_getinfo(L, cstr!("f"), ar); // push function
lua_pushvalue(L, lua::LUA_GLOBALSINDEX);
if compat53_findfield(L, top + 1, 2) != 0 {
lua_copy(L, -1, top + 1); // move name to proper place
lua_pop(L, 2); // remove pushed values
return 1;
} else {
lua_settop(L, top); // remove function and global table
return 0;
}
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
unsafe fn compat53_pushfuncname(L: *mut lua_State, ar: *mut lua_Debug) {
if *(*ar).namewhat != b'\0' as c_char {
// is there a name?
lua_pushfstring(L, cstr!("function '%s'"), (*ar).name);
} else if *(*ar).what == b'm' as c_char {
// main?
lua_pushliteral(L, "main chunk");
} else if *(*ar).what == b'C' as c_char {
if compat53_pushglobalfuncname(L, ar) != 0 {
lua_pushfstring(L, cstr!("function '%s'"), lua_tostring(L, -1));
lua_remove(L, -2); // remove name
} else {
lua_pushliteral(L, "?");
}
} else {
lua_pushfstring(
L,
cstr!("function <%s:%d>"),
(*ar).short_src.as_ptr(),
(*ar).linedefined,
);
}
}
unsafe fn compat53_call_lua(L: *mut lua_State, code: &str, nargs: c_int, nret: c_int) {
lua_rawgetp(L, lua::LUA_REGISTRYINDEX, code.as_ptr() as *const c_void);
if lua_type(L, -1) != lua::LUA_TFUNCTION {
lua_pop(L, 1);
if luaL_loadbuffer(
L,
code.as_ptr() as *const c_char,
code.as_bytes().len(),
cstr!("=none"),
) != 0
{
lua_error(L);
}
lua_pushvalue(L, -1);
lua_rawsetp(L, lua::LUA_REGISTRYINDEX, code.as_ptr() as *const c_void);
}
lua_insert(L, -nargs - 1);
lua_call(L, nargs, nret);
}
//
// lua ported functions
//
#[cfg(any(feature = "lua51", feature = "luajit"))]
#[inline(always)]
pub fn lua_upvalueindex(i: c_int) -> c_int {
lua::LUA_GLOBALSINDEX - i
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_absindex(L: *mut lua_State, mut idx: c_int) -> c_int {
if idx < 0 && idx > lua::LUA_REGISTRYINDEX {
idx += lua_gettop(L) + 1;
}
idx
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
static COMPAT53_ARITH_CODE: &str = r#"
local op,a,b=...
if op == 0 then return a+b
elseif op == 1 then return a-b
elseif op == 2 then return a*b
elseif op == 3 then return a/b
elseif op == 4 then return a%b
elseif op == 5 then return a^b
elseif op == 6 then return -a
end
"#;
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_arith(L: *mut lua_State, op: c_int) {
if op < LUA_OPADD || op > LUA_OPUNM {
luaL_error(L, cstr!("invalid 'op' argument for lua_arith"));
}
luaL_checkstack(L, 5, cstr!("not enough stack slots"));
if op == LUA_OPUNM {
lua_pushvalue(L, -1);
}
lua_pushnumber(L, op as lua_Number);
lua_insert(L, -3);
compat53_call_lua(L, COMPAT53_ARITH_CODE, 3, 1);
}
pub unsafe fn lua_rotate(L: *mut lua_State, mut idx: c_int, mut n: c_int) {
idx = lua_absindex(L, idx);
let n_elems = lua_gettop(L) - idx + 1;
if n < 0 {
n += n_elems;
}
if n > 0 && n < n_elems {
luaL_checkstack(L, 2, cstr!("not enough stack slots available"));
n = n_elems - n;
compat53_reverse(L, idx, idx + n - 1);
compat53_reverse(L, idx + n, idx + n_elems - 1);
compat53_reverse(L, idx, idx + n_elems - 1);
}
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_copy(L: *mut lua_State, fromidx: c_int, toidx: c_int) {
let abs_to = lua_absindex(L, toidx);
luaL_checkstack(L, 1, cstr!("not enough stack slots"));
lua_pushvalue(L, fromidx);
lua_replace(L, abs_to);
}
pub unsafe fn lua_isinteger(L: *mut lua_State, idx: c_int) -> c_int {
if lua_type(L, idx) == lua::LUA_TNUMBER {
let n = lua_tonumber(L, idx);
let i = lua_tointeger(L, idx);
if (n - i as lua_Number).abs() < lua_Number::EPSILON {
return 1;
}
}
return 0;
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_tonumberx(L: *mut lua_State, i: c_int, isnum: *mut c_int) -> lua_Number {
let n = lua_tonumber(L, i);
if !isnum.is_null() {
*isnum = if n != 0.0 || lua_isnumber(L, i) != 0 {
1
} else {
0
};
}
return n;
}
// Implemented for Lua 5.2 as well
// See https://github.com/keplerproject/lua-compat-5.3/issues/40
pub unsafe fn lua_tointegerx(L: *mut lua_State, i: c_int, isnum: *mut c_int) -> lua_Integer {
let mut ok = 0;
let n = lua_tonumberx(L, i, &mut ok);
let n_int = n as lua_Integer;
if ok != 0 && (n - n_int as lua_Number).abs() < lua_Number::EPSILON {
if !isnum.is_null() {
*isnum = 1;
}
return n_int;
}
if !isnum.is_null() {
*isnum = 0;
}
return 0;
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_rawlen(L: *mut lua_State, idx: c_int) -> usize {
lua_objlen(L, idx)
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_compare(L: *mut lua_State, mut idx1: c_int, mut idx2: c_int, op: c_int) -> c_int {
match op {
lua::LUA_OPEQ => lua_equal(L, idx1, idx2),
lua::LUA_OPLT => lua_lessthan(L, idx1, idx2),
lua::LUA_OPLE => {
luaL_checkstack(L, 5, cstr!("not enough stack slots"));
idx1 = lua_absindex(L, idx1);
idx2 = lua_absindex(L, idx2);
lua_pushvalue(L, idx1);
lua_pushvalue(L, idx2);
compat53_call_lua(L, "local a,b=...\nreturn a<=b\n", 2, 1);
let result = lua_toboolean(L, -1);
lua_pop(L, 1);
result
}
_ => luaL_error(L, cstr!("invalid 'op' argument for lua_compare")),
}
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_pushlstring(L: *mut lua_State, s: *const c_char, l: usize) -> *const c_char {
if l == 0 {
lua_pushlstring_old(L, cstr!(""), 0);
} else {
lua_pushlstring_old(L, s, l);
}
lua_tostring(L, -1)
}
#[cfg(feature = "lua52")]
pub unsafe fn lua_pushlstring(L: *mut lua_State, s: *const c_char, l: usize) -> *const c_char {
if l == 0 {
lua_pushlstring_old(L, cstr!(""), 0)
} else {
lua_pushlstring_old(L, s, l)
}
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_pushstring(L: *mut lua_State, s: *const c_char) -> *const c_char {
lua_pushstring_old(L, s);
lua_tostring(L, -1)
}
#[cfg(feature = "lua52")]
pub unsafe fn lua_getglobal(L: *mut lua_State, var: *const c_char) -> c_int {
lua_getglobal_old(L, var);
lua_type(L, -1)
}
pub unsafe fn lua_gettable(L: *mut lua_State, idx: c_int) -> c_int {
lua_gettable_old(L, idx);
lua_type(L, -1)
}
pub unsafe fn lua_getfield(L: *mut lua_State, idx: c_int, k: *const c_char) -> c_int {
lua_getfield_old(L, idx, k);
lua_type(L, -1)
}
pub unsafe fn lua_geti(L: *mut lua_State, mut idx: c_int, n: lua_Integer) -> c_int {
idx = lua_absindex(L, idx);
lua_pushinteger(L, n);
lua_gettable(L, idx);
lua_type(L, -1)
}
// A new version which returns c_int
pub unsafe fn lua_rawget(L: *mut lua_State, idx: c_int) -> c_int {
lua_rawget_old(L, idx);
lua_type(L, -1)
}
// A new version which returns c_int
pub unsafe fn lua_rawgeti(L: *mut lua_State, idx: c_int, n: lua_Integer) -> c_int {
lua_rawgeti_old(L, idx, n);
lua_type(L, -1)
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_rawgetp(L: *mut lua_State, idx: c_int, p: *const c_void) -> c_int {
let abs_i = lua_absindex(L, idx);
lua_pushlightuserdata(L, p as *mut c_void);
lua_rawget(L, abs_i);
lua_type(L, -1)
}
#[cfg(feature = "lua52")]
pub unsafe fn lua_rawgetp(L: *mut lua_State, idx: c_int, p: *const c_void) -> c_int {
lua_rawgetp_old(L, idx, p);
lua_type(L, -1)
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_getuservalue(L: *mut lua_State, idx: c_int) -> c_int {
lua_getfenv(L, idx);
lua_type(L, -1)
}
#[cfg(feature = "lua52")]
pub unsafe fn lua_getuservalue(L: *mut lua_State, idx: c_int) -> c_int {
lua_getuservalue_old(L, idx);
lua_type(L, -1)
}
pub unsafe fn lua_seti(L: *mut lua_State, mut idx: c_int, n: lua_Integer) {
luaL_checkstack(L, 1, cstr!("not enough stack slots available"));
idx = lua_absindex(L, idx);
lua_pushinteger(L, n);
lua_insert(L, -2);
lua_settable(L, idx);
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_rawsetp(L: *mut lua_State, idx: c_int, p: *const c_void) {
let abs_i = lua_absindex(L, idx);
luaL_checkstack(L, 1, cstr!("not enough stack slots"));
lua_pushlightuserdata(L, p as *mut c_void);
lua_insert(L, -2);
lua_rawset(L, abs_i);
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_setuservalue(L: *mut lua_State, idx: c_int) {
luaL_checktype(L, -1, lua::LUA_TTABLE);
lua_setfenv(L, idx);
}
pub unsafe fn lua_dump(
L: *mut lua_State,
writer: lua_Writer,
data: *mut c_void,
_strip: c_int,
) -> c_int {
lua_dump_old(L, writer, data)
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_resume(L: *mut lua_State, _from: *mut lua_State, narg: c_int) -> c_int {
lua_resume_old(L, narg)
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn lua_len(L: *mut lua_State, idx: c_int) {
match lua_type(L, idx) {
lua::LUA_TSTRING => {
lua_pushnumber(L, lua_objlen(L, idx) as lua_Number);
}
lua::LUA_TTABLE => {
if luaL_callmeta(L, idx, cstr!("__len")) == 0 {
lua_pushnumber(L, lua_objlen(L, idx) as lua_Number);
}
}
lua::LUA_TUSERDATA if luaL_callmeta(L, idx, cstr!("__len")) != 0 => {}
_ => {
luaL_error(
L,
cstr!("attempt to get length of a %s value"),
lua_typename(L, lua_type(L, idx)),
);
}
}
}
pub unsafe fn lua_stringtonumber(L: *mut lua_State, s: *const c_char) -> usize {
use std::str::FromStr;
let cs = CStr::from_ptr(s);
if let Ok(rs) = cs.to_str() {
if let Ok(n) = f64::from_str(rs.trim()) {
lua_pushnumber(L, n as lua_Number);
return cs.to_bytes_with_nul().len();
}
}
0
}
pub unsafe fn lua_getextraspace(L: *mut lua_State) -> *mut c_void {
use super::glue::LUA_EXTRASPACE;
luaL_checkstack(L, 4, cstr!("not enough stack slots available"));
lua_pushliteral(L, "__compat53_extraspace");
lua_pushvalue(L, -1);
lua_rawget(L, lua::LUA_REGISTRYINDEX);
if lua_istable(L, -1) == 0 {
lua_pop(L, 1);
lua_createtable(L, 0, 2);
lua_createtable(L, 0, 1);
lua_pushliteral(L, "k");
lua_setfield(L, -2, cstr!("__mode"));
lua_setmetatable(L, -2);
lua_pushvalue(L, -2);
lua_pushvalue(L, -2);
lua_rawset(L, lua::LUA_REGISTRYINDEX);
}
lua_replace(L, -2);
let is_main = lua_pushthread(L);
lua_rawget(L, -2);
let mut _ptr = lua_touserdata(L, -1);
if _ptr.is_null() {
lua_pop(L, 1);
_ptr = lua_newuserdata(L, LUA_EXTRASPACE as usize);
if is_main != 0 {
// mem::size_of::<c_void>() == 1
ptr::write_bytes(_ptr, 0, LUA_EXTRASPACE as usize);
lua_pushthread(L);
lua_pushvalue(L, -2);
lua_rawset(L, -4);
lua_pushboolean(L, 1);
lua_pushvalue(L, -2);
lua_rawset(L, -4);
} else {
lua_pushboolean(L, 1);
lua_rawget(L, -3);
let mptr = lua_touserdata(L, -1);
if !mptr.is_null() {
ptr::copy_nonoverlapping(mptr, _ptr, LUA_EXTRASPACE as usize)
} else {
ptr::write_bytes(_ptr, 0, LUA_EXTRASPACE as usize);
}
lua_pop(L, 1);
lua_pushthread(L);
lua_pushvalue(L, -2);
lua_rawset(L, -4);
}
}
lua_pop(L, 2);
return _ptr;
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
#[inline(always)]
pub unsafe fn lua_pushglobaltable(L: *mut lua_State) {
lua_pushvalue(L, lua::LUA_GLOBALSINDEX);
}
//
// lauxlib ported functions
//
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn luaL_checkstack(L: *mut lua_State, sz: c_int, msg: *const c_char) {
if lua_checkstack(L, sz + lua::LUA_MINSTACK) == 0 {
if !msg.is_null() {
luaL_error(L, cstr!("stack overflow (%s)"), msg);
} else {
lua_pushliteral(L, "stack overflow");
lua_error(L);
}
}
}
pub unsafe fn luaL_checkversion(_L: *mut lua_State) {
// Void
}
pub unsafe fn luaL_getmetafield(L: *mut lua_State, obj: c_int, e: *const c_char) -> c_int {
if luaL_getmetafield_old(L, obj, e) != 0 {
lua_type(L, -1)
} else {
lua::LUA_TNIL
}
}
pub unsafe fn luaL_newmetatable(L: *mut lua_State, tname: *const c_char) -> c_int {
if luaL_newmetatable_old(L, tname) != 0 {
lua_pushstring(L, tname);
lua_setfield(L, -2, cstr!("__name"));
1
} else {
0
}
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn luaL_loadbufferx(
L: *mut lua_State,
buff: *const c_char,
sz: usize,
name: *const c_char,
mode: *const c_char,
) -> c_int {
let status = if sz > 0 && *buff as u8 == lua::LUA_SIGNATURE[0] {
compat53_checkmode(L, mode, cstr!("binary"), lua::LUA_ERRSYNTAX)
} else {
compat53_checkmode(L, mode, cstr!("text"), lua::LUA_ERRSYNTAX)
};
if status != lua::LUA_OK {
return status;
}
luaL_loadbuffer(L, buff, sz, name)
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn luaL_len(L: *mut lua_State, idx: c_int) -> lua_Integer {
let mut isnum = 0;
luaL_checkstack(L, 1, cstr!("not enough stack slots"));
lua_len(L, idx);
let res = lua_tointegerx(L, -1, &mut isnum);
lua::lua_pop(L, 1);
if isnum == 0 {
luaL_error(L, cstr!("object length is not an integer"));
}
res
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn luaL_traceback(
L: *mut lua_State,
L1: *mut lua_State,
msg: *const c_char,
mut level: c_int,
) {
let mut ar: lua_Debug = std::mem::zeroed();
let top = lua_gettop(L);
let numlevels = compat53_countlevels(L1);
let mark = if numlevels > COMPAT53_LEVELS1 + COMPAT53_LEVELS2 {
COMPAT53_LEVELS1
} else {
0
};
if !msg.is_null() {
lua_pushfstring(L, cstr!("%s\n"), msg);
}
lua_pushliteral(L, "stack traceback:");
while lua_getstack(L1, level, &mut ar) != 0 {
level += 1;
if level == mark {
// too many levels?
lua_pushliteral(L, "\n\t..."); // add a '...'
level = numlevels - COMPAT53_LEVELS2; // and skip to last ones
} else {
lua_getinfo(L1, cstr!("Slnt"), &mut ar);
lua_pushfstring(L, cstr!("\n\t%s:"), cstr!("ok") /*ar.short_src*/);
if ar.currentline > 0 {
lua_pushfstring(L, cstr!("%d:"), ar.currentline);
}
lua_pushliteral(L, " in ");
compat53_pushfuncname(L, &mut ar);
lua_concat(L, lua_gettop(L) - top);
}
}
lua_concat(L, lua_gettop(L) - top);
}
pub unsafe fn luaL_tolstring(L: *mut lua_State, idx: c_int, len: *mut usize) -> *const c_char {
if luaL_callmeta(L, idx, cstr!("__tostring")) == 0 {
let t = lua_type(L, idx);
match t {
lua::LUA_TNIL => {
lua_pushliteral(L, "nil");
}
lua::LUA_TSTRING | lua::LUA_TNUMBER => {
lua_pushvalue(L, idx);
}
lua::LUA_TBOOLEAN => {
if lua_toboolean(L, idx) == 0 {
lua_pushliteral(L, "false");
} else {
lua_pushliteral(L, "true");
}
}
_ => {
let tt = luaL_getmetafield(L, idx, cstr!("__name"));
let name = if tt == lua::LUA_TSTRING {
lua_tostring(L, -1)
} else {
lua_typename(L, t)
};
lua_pushfstring(L, cstr!("%s: %p"), name, lua_topointer(L, idx));
if tt != lua::LUA_TNIL {
lua_replace(L, -2);
}
}
};
} else if lua_isstring(L, -1) == 0 {
luaL_error(L, cstr!("'__tostring' must return a string"));
}
lua_tolstring(L, -1, len)
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn luaL_setmetatable(L: *mut lua_State, tname: *const c_char) {
luaL_checkstack(L, 1, cstr!("not enough stack slots"));
luaL_getmetatable(L, tname);
lua_setmetatable(L, -2);
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn luaL_testudata(L: *mut lua_State, i: c_int, tname: *const c_char) -> *mut c_void {
let mut p = lua_touserdata(L, i);
luaL_checkstack(L, 2, cstr!("not enough stack slots"));
if p.is_null() || lua_getmetatable(L, i) == 0 {
return ptr::null_mut();
} else {
luaL_getmetatable(L, tname);
let res = lua_rawequal(L, -1, -2);
lua_pop(L, 2);
if res == 0 {
p = ptr::null_mut();
}
}
return p;
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn luaL_setfuncs(L: *mut lua_State, mut l: *const luaL_Reg, nup: c_int) {
luaL_checkstack(L, nup + 1, cstr!("too many upvalues"));
while !(*l).name.is_null() {
// fill the table with given functions
l = l.offset(1);
lua_pushstring(L, (*l).name);
for _ in 0..nup {
// copy upvalues to the top
lua_pushvalue(L, -(nup + 1));
}
lua_pushcclosure(L, (*l).func, nup); // closure with those upvalues
lua_settable(L, -(nup + 3)); // table must be below the upvalues, the name and the closure
}
lua_pop(L, nup); // remove upvalues
}
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub unsafe fn luaL_getsubtable(L: *mut lua_State, idx: c_int, fname: *const c_char) -> c_int {
let abs_i = lua_absindex(L, idx);
luaL_checkstack(L, 3, cstr!("not enough stack slots"));
lua_pushstring(L, fname);
lua_gettable(L, abs_i);
if lua_istable(L, -1) != 0 {
return 1;
}
lua_pop(L, 1);
lua_newtable(L);
lua_pushstring(L, fname);
lua_pushvalue(L, -2);
lua_settable(L, abs_i);
return 0;
}
pub unsafe fn luaL_requiref(
L: *mut lua_State,
modname: *const c_char,
openf: lua_CFunction,
glb: c_int,
) {
luaL_checkstack(L, 3, cstr!("not enough stack slots available"));
luaL_getsubtable(L, lua::LUA_REGISTRYINDEX, cstr!("_LOADED"));
if lua_getfield(L, -1, modname) == lua::LUA_TNIL {
lua_pop(L, 1);
lua_pushcfunction(L, openf);
lua_pushstring(L, modname);
#[cfg(any(feature = "lua52", feature = "lua51"))]
{
lua_call(L, 1, 1);
lua_pushvalue(L, -1);
lua_setfield(L, -3, modname);
}
#[cfg(feature = "luajit")]
{
lua_call(L, 1, 0);
lua_getfield(L, -1, modname);
}
}
if cfg!(any(feature = "lua52", feature = "lua51")) && glb != 0 {
lua_pushvalue(L, -1);
lua_setglobal(L, modname);
}
if cfg!(feature = "luajit") && glb == 0 {
lua_pushnil(L);
lua_setglobal(L, modname);
}
lua_replace(L, -2);
}

View File

@ -0,0 +1,289 @@
// The MIT License (MIT)
//
// Copyright (c) 2019-2021 A. Orlenko
// Copyright (c) 2014 J.C. Moyer
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <lauxlib.h>
#include <lua.h>
#include <lualib.h>
#ifndef LUA_EXTRASPACE
#define LUA_EXTRASPACE (sizeof(void*))
#endif
// Macros taken from https://gcc.gnu.org/onlinedocs/cpp/Stringification.html
#define xstr(s) str(s)
#define str(s) #s
typedef struct rs_item {
int type;
const char *name;
union {
int int_val;
const char *str_val;
LUA_INTEGER lua_int_val;
};
} rs_item;
#define TY_INT 0
#define RS_INT(name, val) \
{ TY_INT, name, .int_val = val }
#if LUA_VERSION_NUM >= 503
#define TY_LUAINT 1
#define RS_LUAINT(name, val) \
{ TY_LUAINT, name, .lua_int_val = val }
#endif
#define TY_STR 2
#define RS_STR(name, val) \
{ TY_STR, name, .str_val = val }
#define TY_TYPE 3
#define RS_TYPE(name, val) \
{ TY_TYPE, name, .str_val = val }
#define TY_COMMENT 4
#define RS_COMMENT(val) \
{ TY_COMMENT, NULL, .str_val = val }
#define TY_RAW 5
#define RS_RAW(val) \
{ TY_RAW, NULL, .str_val = val }
const char *rs_int_type(int width) {
switch (width) {
default:
case 2:
return "i16";
case 4:
return "i32";
case 8:
return "i64";
case 16:
return "i128";
}
}
const char *rs_uint_type(int width) {
switch (width) {
default:
case 2:
return "u16";
case 4:
return "u32";
case 8:
return "u64";
case 16:
return "u128";
}
}
int try_write(char **str, char c, size_t n, size_t *written, size_t szstr) {
if (szstr - *written < n) {
return 0;
}
for (; n; n--, *written++)
*(*str)++ = c;
return 1;
}
// converts \ in a string to \\ so that it can be used as a rust string literal
// ensures that `out` will always have a null terminating character
size_t escape(const char *in, char *out, size_t szout) {
size_t written = 0;
char cur;
while ((cur = *in++)) {
switch (cur) {
case '\\':
if (!try_write(&out, cur, 2, &written, szout))
goto finalize;
break;
default:
if (!try_write(&out, cur, 1, &written, szout))
goto finalize;
break;
}
}
finalize:
if (written + 1 <= szout) {
*out++ = '\0';
written++;
}
return written;
}
int write_int_item(FILE *f, const char *name, int value) {
return fprintf(f, "pub const %s: c_int = %d;\n", name, value);
}
#if LUA_VERSION_NUM >= 503
int write_lua_int_item(FILE *f, const char *name, LUA_INTEGER value) {
return fprintf(f, "pub const %s: LUA_INTEGER = " LUA_INTEGER_FMT ";\n", name,
value);
}
#endif
int write_str_item(FILE *f, const char *name, const char *value) {
size_t len = strlen(value);
size_t bufsz = len * 2 + 1;
char *buf = malloc(bufsz);
int ret;
escape(value, buf, bufsz);
ret = fprintf(f, "pub const %s: &str = \"%s\";\n", name, buf);
free(buf);
return ret;
}
int write_type(FILE *f, const char *name, const char *value) {
return fprintf(f, "pub type %s = %s;\n", name, value);
}
int write_comment(FILE *f, const char *value) {
return fprintf(f, "/* %s */\n", value);
}
int write_raw(FILE *f, const char *value) { return fputs(value, f) >= 0; }
int write_item(FILE *f, const rs_item *c) {
switch (c->type) {
case TY_INT:
return write_int_item(f, c->name, c->int_val);
#if LUA_VERSION_NUM >= 503
case TY_LUAINT:
return write_lua_int_item(f, c->name, c->lua_int_val);
#endif
case TY_STR:
return write_str_item(f, c->name, c->str_val);
case TY_TYPE:
return write_type(f, c->name, c->str_val);
case TY_COMMENT:
return write_comment(f, c->str_val);
case TY_RAW:
return write_raw(f, c->str_val);
default:
return 0;
}
}
int write_items_(FILE *f, const rs_item items[], size_t num) {
size_t i;
for (i = 0; i < num; i++) {
if (!write_item(f, &items[i]))
return 0;
}
return 1;
}
#define write_items(f, cs) write_items_(f, cs, sizeof(cs) / sizeof(cs[0]))
int main(int argc, const char **argv) {
if (argc <= 1) {
printf("usage: %s <filename>\n", argv[0]);
return EXIT_FAILURE;
}
const char *filename = argv[1];
FILE *f = fopen(filename, "w");
if (!f) {
printf("could not open file: errno = %d\n", errno);
return EXIT_FAILURE;
}
const rs_item glue_entries[] = {
RS_COMMENT("this file was generated by glue.c; do not modify it by hand"),
RS_RAW("use std::os::raw::*;\n"),
// == luaconf.h ==========================================================
RS_COMMENT("luaconf.h"),
RS_INT("LUA_EXTRASPACE", LUA_EXTRASPACE),
RS_INT("LUA_IDSIZE", LUA_IDSIZE),
RS_TYPE("LUA_NUMBER",
sizeof(LUA_NUMBER) > sizeof(float) ? "c_double" : "c_float"),
RS_TYPE("LUA_INTEGER", rs_int_type(sizeof(LUA_INTEGER))),
#if LUA_VERSION_NUM >= 502
RS_TYPE("LUA_UNSIGNED", rs_uint_type(sizeof(LUA_UNSIGNED))),
#else
RS_TYPE("LUA_UNSIGNED", rs_uint_type(sizeof(size_t))),
#endif
// == lua.h ==============================================================
RS_COMMENT("lua.h"),
RS_INT("LUA_VERSION_NUM", LUA_VERSION_NUM),
RS_INT("LUA_REGISTRYINDEX", LUA_REGISTRYINDEX),
#if LUA_VERSION_NUM == 501
RS_INT("LUA_ENVIRONINDEX", LUA_ENVIRONINDEX),
RS_INT("LUA_GLOBALSINDEX", LUA_GLOBALSINDEX),
#endif
// == lauxlib.h ==========================================================
RS_COMMENT("lauxlib.h"),
#if LUA_VERSION_NUM >= 503
RS_INT("LUAL_NUMSIZES", LUAL_NUMSIZES),
#endif
// == lualib.h ===========================================================
RS_COMMENT("lualib.h"),
RS_STR("LUA_COLIBNAME", LUA_COLIBNAME),
RS_STR("LUA_TABLIBNAME", LUA_TABLIBNAME),
RS_STR("LUA_IOLIBNAME", LUA_IOLIBNAME),
RS_STR("LUA_OSLIBNAME", LUA_OSLIBNAME),
RS_STR("LUA_STRLIBNAME", LUA_STRLIBNAME),
#ifdef LUA_UTF8LIBNAME
RS_STR("LUA_UTF8LIBNAME", LUA_UTF8LIBNAME),
#endif
#ifdef LUA_BITLIBNAME
RS_STR("LUA_BITLIBNAME", LUA_BITLIBNAME),
#endif
RS_STR("LUA_MATHLIBNAME", LUA_MATHLIBNAME),
RS_STR("LUA_DBLIBNAME", LUA_DBLIBNAME),
RS_STR("LUA_LOADLIBNAME", LUA_LOADLIBNAME),
#ifdef LUA_JITLIBNAME
RS_STR("LUA_JITLIBNAME", LUA_JITLIBNAME),
#endif
#ifdef LUA_FFILIBNAME
RS_STR("LUA_FFILIBNAME", LUA_FFILIBNAME),
#endif
};
if (!write_items(f, glue_entries)) {
printf("%s: error generating %s; aborting\n", argv[0], filename);
return EXIT_FAILURE;
}
fclose(f);
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,306 @@
// The MIT License (MIT)
//
// Copyright (c) 2019-2021 A. Orlenko
// Copyright (c) 2014 J.C. Moyer
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//! Contains definitions from `lauxlib.h`.
use std::os::raw::{c_char, c_int, c_long, c_void};
use std::ptr;
use super::lua::{self, lua_CFunction, lua_Integer, lua_Number, lua_State};
#[cfg(any(feature = "lua54", feature = "lua53"))]
pub use super::glue::LUAL_NUMSIZES;
#[cfg(any(feature = "lua52", feature = "lua51", feature = "luajit"))]
pub use super::compat53::{luaL_getmetafield, luaL_newmetatable, luaL_requiref, luaL_tolstring};
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub use super::compat53::{
luaL_checkstack, luaL_checkversion, luaL_getsubtable, luaL_len, luaL_loadbufferx,
luaL_setfuncs, luaL_setmetatable, luaL_testudata, luaL_traceback,
};
// extra error code for 'luaL_load'
pub const LUA_ERRFILE: c_int = lua::LUA_ERRERR + 1;
#[repr(C)]
pub struct luaL_Reg {
pub name: *const c_char,
pub func: lua_CFunction,
}
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
#[inline(always)]
pub unsafe fn luaL_checkversion(L: *mut lua_State) {
#[cfg(any(feature = "lua54", feature = "lua53"))]
luaL_checkversion_(
L,
lua::LUA_VERSION_NUM as lua_Number,
LUAL_NUMSIZES as usize,
);
#[cfg(feature = "lua52")]
luaL_checkversion_(L, lua::LUA_VERSION_NUM as lua_Number);
}
extern "C" {
#[cfg(any(feature = "lua54", feature = "lua53"))]
pub fn luaL_checkversion_(L: *mut lua_State, ver: lua_Number, sz: usize);
#[cfg(feature = "lua52")]
pub fn luaL_checkversion_(L: *mut lua_State, ver: lua_Number);
#[cfg(any(feature = "lua54", feature = "lua53"))]
pub fn luaL_getmetafield(L: *mut lua_State, obj: c_int, e: *const c_char) -> c_int;
#[cfg(any(feature = "lua52", feature = "lua51", feature = "luajit"))]
#[link_name = "luaL_getmetafield"]
pub fn luaL_getmetafield_old(L: *mut lua_State, obj: c_int, e: *const c_char) -> c_int;
pub fn luaL_callmeta(L: *mut lua_State, obj: c_int, e: *const c_char) -> c_int;
#[cfg(any(feature = "lua54", feature = "lua53"))]
pub fn luaL_tolstring(L: *mut lua_State, idx: c_int, len: *mut usize) -> *const c_char;
pub fn luaL_argerror(L: *mut lua_State, arg: c_int, l: *const c_char) -> c_int;
pub fn luaL_checklstring(L: *mut lua_State, arg: c_int, l: *mut usize) -> *const c_char;
pub fn luaL_optlstring(
L: *mut lua_State,
arg: c_int,
def: *const c_char,
l: *mut usize,
) -> *const c_char;
pub fn luaL_checknumber(L: *mut lua_State, arg: c_int) -> lua_Number;
pub fn luaL_optnumber(L: *mut lua_State, arg: c_int, def: lua_Number) -> lua_Number;
pub fn luaL_checkinteger(L: *mut lua_State, arg: c_int) -> lua_Integer;
pub fn luaL_optinteger(L: *mut lua_State, arg: c_int, def: lua_Integer) -> lua_Integer;
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_checkstack(L: *mut lua_State, sz: c_int, msg: *const c_char);
pub fn luaL_checktype(L: *mut lua_State, arg: c_int, t: c_int);
pub fn luaL_checkany(L: *mut lua_State, arg: c_int);
#[cfg(any(feature = "lua54", feature = "lua53"))]
pub fn luaL_newmetatable(L: *mut lua_State, tname: *const c_char) -> c_int;
#[cfg(any(feature = "lua52", feature = "lua51", feature = "luajit"))]
#[link_name = "luaL_newmetatable"]
pub fn luaL_newmetatable_old(L: *mut lua_State, tname: *const c_char) -> c_int;
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_setmetatable(L: *mut lua_State, tname: *const c_char);
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_testudata(L: *mut lua_State, ud: c_int, tname: *const c_char) -> *mut c_void;
pub fn luaL_checkudata(L: *mut lua_State, ud: c_int, tname: *const c_char) -> *mut c_void;
pub fn luaL_where(L: *mut lua_State, lvl: c_int);
pub fn luaL_error(L: *mut lua_State, fmt: *const c_char, ...) -> !;
// TODO: test this
pub fn luaL_checkoption(
L: *mut lua_State,
arg: c_int,
def: *const c_char,
lst: *const *const c_char,
) -> c_int;
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_fileresult(L: *mut lua_State, stat: c_int, fname: *const c_char) -> c_int;
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_execresult(L: *mut lua_State, stat: c_int) -> c_int;
}
// pre-defined references
pub const LUA_NOREF: c_int = -2;
pub const LUA_REFNIL: c_int = -1;
extern "C" {
pub fn luaL_ref(L: *mut lua_State, t: c_int) -> c_int;
pub fn luaL_unref(L: *mut lua_State, t: c_int, r: c_int);
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_loadfilex(L: *mut lua_State, filename: *const c_char, mode: *const c_char)
-> c_int;
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub fn luaL_loadfile(L: *mut lua_State, filename: *const c_char) -> c_int;
}
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
#[inline(always)]
pub unsafe fn luaL_loadfile(L: *mut lua_State, f: *const c_char) -> c_int {
luaL_loadfilex(L, f, ptr::null())
}
extern "C" {
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_loadbufferx(
L: *mut lua_State,
buff: *const c_char,
sz: usize,
name: *const c_char,
mode: *const c_char,
) -> c_int;
#[cfg(any(feature = "lua51", feature = "luajit"))]
pub fn luaL_loadbuffer(
L: *mut lua_State,
buff: *const c_char,
sz: usize,
name: *const c_char,
) -> c_int;
pub fn luaL_loadstring(L: *mut lua_State, s: *const c_char) -> c_int;
pub fn luaL_newstate() -> *mut lua_State;
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_len(L: *mut lua_State, idx: c_int) -> lua_Integer;
// TODO (lua54): luaL_addgsub
pub fn luaL_gsub(
L: *mut lua_State,
s: *const c_char,
p: *const c_char,
r: *const c_char,
) -> *const c_char;
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_setfuncs(L: *mut lua_State, l: *const luaL_Reg, nup: c_int);
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_getsubtable(L: *mut lua_State, idx: c_int, fname: *const c_char) -> c_int;
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
pub fn luaL_traceback(L: *mut lua_State, L1: *mut lua_State, msg: *const c_char, level: c_int);
// Skip Lua 5.2 implementation in favor of the compat53 one
#[cfg(any(feature = "lua54", feature = "lua53"))]
pub fn luaL_requiref(
L: *mut lua_State,
modname: *const c_char,
openf: lua_CFunction,
glb: c_int,
);
}
#[inline(always)]
#[allow(unused_variables)]
pub unsafe fn luaL_newlibtable(L: *mut lua_State, l: *const luaL_Reg) {
// TODO: figure out how to pass an appropriate hint for the second param
// this involves correcting the second parameter's type; in C this is
// sizeof(l)/sizeof(l[0])
lua::lua_createtable(L, 0, 0)
}
#[inline(always)]
pub unsafe fn luaL_newlib(L: *mut lua_State, l: *const luaL_Reg) {
luaL_checkversion(L);
luaL_newlibtable(L, l);
luaL_setfuncs(L, l, 0)
}
#[inline(always)]
pub unsafe fn luaL_argcheck(L: *mut lua_State, cond: c_int, arg: c_int, extramsg: *const c_char) {
if cond == 0 {
luaL_argerror(L, arg, extramsg);
}
}
#[inline(always)]
pub unsafe fn luaL_checkstring(L: *mut lua_State, n: c_int) -> *const c_char {
luaL_checklstring(L, n, ptr::null_mut())
}
#[inline(always)]
pub unsafe fn luaL_optstring(L: *mut lua_State, n: c_int, d: *const c_char) -> *const c_char {
luaL_optlstring(L, n, d, ptr::null_mut())
}
// From 5.3 user manual:
// Macros to project non-default integer types (luaL_checkint, luaL_optint,
// luaL_checklong, luaL_optlong) were deprecated. Use their equivalent over
// lua_Integer with a type cast (or, when possible, use lua_Integer in your
// code).
#[inline(always)]
//#[deprecated]
pub unsafe fn luaL_checkint(L: *mut lua_State, n: c_int) -> c_int {
luaL_checkinteger(L, n) as c_int
}
#[inline(always)]
//#[deprecated]
pub unsafe fn luaL_optint(L: *mut lua_State, n: c_int, d: c_int) -> c_int {
luaL_optinteger(L, n, d as lua_Integer) as c_int
}
#[inline(always)]
//#[deprecated]
pub unsafe fn luaL_checklong(L: *mut lua_State, n: c_int) -> c_long {
luaL_checkinteger(L, n) as c_long
}
#[inline(always)]
//#[deprecated]
pub unsafe fn luaL_optlong(L: *mut lua_State, n: c_int, d: c_long) -> c_long {
luaL_optinteger(L, n, d as lua_Integer) as c_long
}
#[inline(always)]
pub unsafe fn luaL_typename(L: *mut lua_State, i: c_int) -> *const c_char {
lua::lua_typename(L, lua::lua_type(L, i))
}
#[inline(always)]
pub unsafe fn luaL_dofile(L: *mut lua_State, filename: *const c_char) -> c_int {
let status = luaL_loadfile(L, filename);
if status == 0 {
lua::lua_pcall(L, 0, lua::LUA_MULTRET, 0)
} else {
status
}
}
#[inline(always)]
pub unsafe fn luaL_dostring(L: *mut lua_State, s: *const c_char) -> c_int {
let status = luaL_loadstring(L, s);
if status == 0 {
lua::lua_pcall(L, 0, lua::LUA_MULTRET, 0)
} else {
status
}
}
#[inline(always)]
pub unsafe fn luaL_getmetatable(L: *mut lua_State, n: *const c_char) {
lua::lua_getfield(L, lua::LUA_REGISTRYINDEX, n);
}
// luaL_opt would be implemented here but it is undocumented, so it's omitted
#[cfg(any(feature = "lua54", feature = "lua53", feature = "lua52"))]
#[inline(always)]
pub unsafe fn luaL_loadbuffer(
L: *mut lua_State,
s: *const c_char,
sz: usize,
n: *const c_char,
) -> c_int {
luaL_loadbufferx(L, s, sz, n, ptr::null())
}
// TODO: Add buffer API
// omitted: old module system compatibility (removed in 5.4)

Some files were not shown because too many files have changed in this diff Show More