2018-03-24 12:18:37 +00:00
#!/usr/bin/env perl
# AGI script that uses Google's translate text to speech engine.
# Copyright (C) 2011 - 2015, Lefteris Zafiris <>
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the COPYING file
# at the top of the source tree.
# -----
# Usage
# -----
# agi(googletts.agi,"text",[language],[intkey],[speed]): This will invoke the Google TTS
# engine, render the text string to speech and play it back to the user.
# If 'intkey' is set the script will wait for user input. Any given interrupt keys will
# cause the playback to immediately terminate and the dialplan to proceed to the
# matching extension (this is mainly for use in IVR, see README for examples).
# If 'speed' is set the speech rate is altered by that factor (defaults to 1.2).
# The script contacts google's TTS service in order to get the voice data
# which then stores in a local cache (by default /tmp/) for future use.
# Parameters like default language, sample rate, caching and cache dir
# can be set up by altering the following variables:
# Default langeuage: $lang
# Sample rate: $samplerate
# Speed factor: $speed
# Chace: $usecache
# Chache directory: $cachedir
# SoX Version: $sox_ver
use warnings;
use strict;
use utf8;
use Encode qw(decode encode);
use File::Temp qw(tempfile);
use File::Copy qw(move);
use File::Path qw(mkpath);
use Digest::MD5 qw(md5_hex);
use URI::Escape;
use LWP::UserAgent;
use LWP::ConnCache;
$| = 1;
# ----------------------------- #
# User defined parameters: #
# ----------------------------- #
# Default language #
my $lang = "en";
# Output speed factor #
my $speed = 1;
# Use of cache mechanism #
my $usecache = 1;
# Cache directory path #
my $cachedir = "/tmp";
# Output audio sample rate #
# Leave blank to auto-detect #
my $samplerate = "";
# SoX Version #
# Leave blank to auto-detect #
my $sox_ver = "";
# Verbose debugging messages #
my $debug = 0;
# ----------------------------- #
my %AGI;
my @text;
my $fh;
my $tmpname;
my $fexten;
my $intkey = "";
my $tmpdir = "/tmp";
my $maxlen = 4096;
my $timeout = 10;
my $url = "";
my $sox = `/usr/bin/which sox`;
my $mpg123 = `/usr/bin/which mpg123`;
# Store AGI input #
($AGI{arg_1}, $AGI{arg_2}, $AGI{arg_3}, $AGI{arg_4}) = @ARGV;
while (<STDIN>) {
last if (!length);
$AGI{$1} = $2 if (/^agi_(\w+)\:\s+(.*)$/);
my $name = " -- $AGI{request}:";
# Abort if required programs not found. #
fatal_log("sox or mpg123 is missing. Aborting.") if (!$sox || !$mpg123);
chomp($sox, $mpg123);
# Sanitising input #
$AGI{arg_1} = decode('utf8', $AGI{arg_1});
for ($AGI{arg_1}) {
s/[\\|*~<>^\(\)\[\]\{\}[:cntrl:]]/ /g;
s/\s+/ /g;
fatal_log("No text passed for synthesis.") if (!length);
# Split input to comply with google tts requirements #
@text = /.{1,150}$|.{1,150}[.,?!:;]|.{1,150}\s/g;
my $lines = @text;
# Setting language, interrupt keys and speed rate #
if (length($AGI{arg_2})) {
if ($AGI{arg_2} =~ /^[a-zA-Z]{2}(-[a-zA-Z]{2,6})?$/) {
$lang = $AGI{arg_2};
} else {
console_log("Invalid language setting. Using default.");
if (length($AGI{arg_3})) {
$intkey = "0123456789#*" if ($AGI{arg_3} eq "any");
$intkey = $AGI{arg_3} if ($AGI{arg_3} =~ /^[0-9*#]+$/);
if (length($AGI{arg_4})) {
$speed = $AGI{arg_4} if ($AGI{arg_4} =~ /^\d+(\.\d+)?$/);
# Check cache path size: dir length + md5 + file extension #
if ($usecache) {
if ((length($cachedir) + 32 + 6) < $maxlen) {
mkpath("$cachedir") unless (-d "$cachedir");
} else {
console_log("Cache path size exceeds limit. Disabling cache.");
$usecache = 0;
# Answer channel if not already answered #
my @result = checkresponse();
if ($result[0] == 4) {
print "ANSWER\n";
@result = checkresponse();
if ($result[0] != 0) {
fatal_log("Failed to answer channel.");
# Setting filename extension according to sample rate. #
if (!$samplerate) { ($fexten, $samplerate) = detect_format(); }
elsif ($samplerate == 12000) { $fexten = "sln12"; }
elsif ($samplerate == 16000) { $fexten = "sln16"; }
elsif ($samplerate == 32000) { $fexten = "sln32"; }
elsif ($samplerate == 44100) { $fexten = "sln44"; }
elsif ($samplerate == 48000) { $fexten = "sln48"; }
else { ($fexten, $samplerate) = ("sln", 8000); }
# Initialise User angent #
my $ua = LWP::UserAgent->new(ssl_opts => { verify_hostname => 1 });
$ua->agent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0");
for (my $i = 0; $i < $lines; $i++) {
my $filename;
my $res;
my $len = length($text[$i]);
my $line = encode('utf8', $text[$i]);
$line =~ s/^\s+|\s+$//g;
next if (length($line) == 0);
if ($debug) {
console_log("Text passed for synthesis: $line",
"Language: $lang, Interrupt keys: $intkey, Sample rate: $samplerate",
"Speed: $speed, Caching: $usecache, Cache dir: $cachedir"
if ($usecache) {
$filename = md5_hex("$line.$lang.$speed");
# Stream file from cache if it exists #
if (-r "$cachedir/$filename.$fexten") {
console_log("File already in cache.") if ($debug);
$res = playback("$cachedir/$filename", $intkey);
die if ($res < 0);
last if ($res > 0);
# Hnadle interrupts #
$SIG{'INT'} = \&int_handler;
$SIG{'HUP'} = \&int_handler;
($fh, $tmpname) = tempfile("ggl_XXXXXXXX", DIR => $tmpdir, UNLINK => 1);
my $token = make_token($line);
$line = uri_escape($line);
my $req = "$url/translate_tts?ie=UTF-8&q=$line&tl=$lang&total=$lines&idx=$i&textlen=$len&client=tw-ob&tk=$token&prev=input";
console_log("URL passed: $req") if ($debug);
my $ua_request = HTTP::Request->new('GET' => $req);
'Accept' => '*/*',
'Accept-Encoding' => 'identity;q=1, *;q=0',
'Accept-Language' => 'en-US,en;q=0.5',
'DNT' => '1',
'Range' => 'bytes=0-',
'Referer' => '',
my $ua_response = $ua->request($ua_request, $tmpname);
fatal_log("Failed to fetch file: ", $ua_response->code, $ua_response->message) unless ($ua_response->is_success);
# Convert mp3 file to 16bit 8Khz or 16kHz mono raw #
system($mpg123, "-q", "-w", "$tmpname.wav", $tmpname) == 0
or fatal_log("$mpg123 failed: $?");
# Detect sox version #
if (!$sox_ver) {
$sox_ver = (system("$sox --version > /dev/null 2>&1") == 0) ? 14 : 12;
console_log("Found sox version $sox_ver in: $sox, mpg123 in: $mpg123") if ($debug);
my @soxargs = get_sox_args("$tmpname.wav", "$tmpname.$fexten");
system(@soxargs) == 0 or fatal_log("$sox failed: $?");
unlink "$tmpname.wav";
# Playback and save file in cache #
$res = playback($tmpname, $intkey);
die if ($res < 0);
if ($usecache) {
console_log("Saving file $filename to cache") if ($debug);
move("$tmpname.$fexten", "$cachedir/$filename.$fexten");
} else {
unlink "$tmpname.$fexten";
last if ($res > 0);
sub checkresponse {
my $input = <STDIN>;
my @values;
chomp $input;
if ($input =~ /^200 result=(-?\d+)\s?(.*)$/) {
@values = ("$1", "$2");
} else {
$input .= <STDIN> if ($input =~ /^520-Invalid/);
warn "$name Unexpected result: $input\n";
@values = (-1, -1);
return @values;
sub playback {
my ($file, $keys) = @_;
my @response;
print "STREAM FILE $file \"$keys\"\n";
@response = checkresponse();
if ($response[0] >= 32 && chr($response[0]) =~ /[\w*#]/) {
console_log("Got digit chr($response[0])") if ($debug);
print "SET EXTENSION ", chr($response[0]), "\n";
print "SET PRIORITY 1\n";
} elsif ($response[0] == -1) {
console_log("Failed to play $file.");
return $response[0];
sub detect_format {
# Detect the sound format used #
my @format;
print "GET FULL VARIABLE \${CHANNEL(audionativeformat)}\n";
my @reply = checkresponse();
for ($reply[1]) {
if (/(silk|sln)12/) { @format = ("sln12", 12000); }
elsif (/(speex|slin|silk)16|g722|siren7/) { @format = ("sln16", 16000); }
elsif (/(speex|slin|celt)32|siren14/) { @format = ("sln32", 32000); }
elsif (/(celt|slin)44/) { @format = ("sln44", 44100); }
elsif (/(celt|slin)48/) { @format = ("sln48", 48000); }
else { @format = ("sln", 8000); }
return @format;
sub get_sox_args {
# Set the appropiate sox cli arguments #
my ($source_file, $dest_file) = @_;
my @soxargs = ($sox, $source_file, "-q", "-r", $samplerate, "-t", "raw", $dest_file);
if ($speed != 1) {
if ($sox_ver >= 14) {
push(@soxargs, ("tempo", "-s", $speed));
} else {
push(@soxargs, ("stretch", 1 / $speed, "80"));
return @soxargs;
# Obfuscated crap straight from Google:
sub make_token {
my $text = shift;
my $time = int(time() / 3600);
my @chars = unpack('U*', $text);
my $stamp = $time;
foreach (@chars) {
$stamp = make_rl($stamp + $_, '+-a^+6');
$stamp = make_rl($stamp, '+-3^+b+-f');
if ($stamp < 0) {
$stamp = ($stamp & 2147483647) + 2147483648;
$stamp %= 10**6;
return ($stamp . '.' . ($stamp ^ $time));
sub make_rl {
my ($num, $str) = @_;
for (my $i = 0; $i < length($str) - 2 ; $i += 3) {
my $d = substr($str, $i+2, 1);
if (ord($d) >= ord('a')) {
$d = ord($d) - 87;
} else {
$d = int($d);
if (substr($str, $i+1, 1) eq '+') {
$d = $num >> $d;
} else {
$d = $num << $d;
if (substr($str, $i, 1) eq '+') {
$num = $num + $d & 4294967295;
} else {
$num = $num ^ $d;
return $num;
sub console_log {
foreach my $message (@_) {
warn "$name $message\n";
print "NOOP \"$name $message\"\n";
sub fatal_log {
sub int_handler {
die "$name Interrupt signal received, terminating...\n";
if ($tmpname) {
warn "$name Cleaning temp files.\n" if ($debug);
unlink glob "$tmpname*";