Text to Speech Fun


The possibilities for working with sound in ActionScript have been expanded with the relatively new SampleData event: you can now record sound from the microphone into a ByteArray or play sound dynamically from a ByteArray. This opens up the potential for all kinds of cool things, and I came across one example of this from Kelvin Luck, a class that plays back sounds at different speeds.

Also cool is the Google Translate text to speech webservice, which can be used to retrieve an mp3 file from a text query. Pete Shand circumvents the 100-character limit with his TextToSpeech class.

Combining these two I made a cool little application (just type in text, and press Hear):
Vodpod videos no longer available.

 


(If the swf doesn’t load, try here.)

While there are still a few little glitches, the potential is enormous. Something like this could be used for creating dynamic stories in games, where the text a user enters is actually spoken by a character, or….

There are still a few glitches in the code, but I’ll post it below. Thanks to Kelvin Luck and Pete Shand for the basis for the two sound classes – I only slightly changed them. If anyone would like to expand the code to include stuff like time-stretching and pitch-bending (also possible in ActionScript), that would be great too.

Update: Just saw that nerdook has used the google text to speech service in his most recent game: I Am An Insane Rogue AI, although not combined with adjusting its speed.

<?xml version="1.0" encoding="utf-8"?>
<s:Application xmlns:fx="http://ns.adobe.com/mxml/2009"
               xmlns:s="library://ns.adobe.com/flex/spark"
               xmlns:mx="library://ns.adobe.com/flex/mx">
	<s:layout>
			<s:VerticalLayout horizontalAlign="center" verticalAlign="middle" />
	</s:layout>
	
	<s:Panel title="Altered TextToSpeech" width="95%" height="95%">
		<s:layout>
			<s:VerticalLayout horizontalAlign="center" paddingTop="10" />
		</s:layout>
		
		<mx:Spacer explicitMinHeight="15"/>
		
		<s:TextInput id="text"/>
		<mx:HSlider id="speed" minimum=".3" maximum="3" value="1" />	
		<s:Button label="Hear" click="{var t:TextToSpeech = new TextToSpeech(text.text,speed.value);  t.addEventListener(Event.COMPLETE, function(e:Event):void { (e.target as TextToSpeech).play();} );}"	/>
	
	</s:Panel>
	
</s:Application>
package  
{
	import flash.events.Event;
	import flash.events.EventDispatcher;
	import flash.display.Sprite;
	import flash.net.URLRequest;
	import flash.media.Sound;
	import flash.media.SoundChannel;
	
	public class TextToSpeech extends EventDispatcher
	{
		public var Lang			:String = "en";
		
		private var str			:String = "";
		private var wordIndex	:int = 0;
		private var words		:Vector.<String>;
		private var queries		:Vector.<String>;
		private var sp			:Sprite = new Sprite();
		private var loadIndex	:int = 0;
		private var sounds		:Vector.<MP3Player>;
		private var channel		:SoundChannel = new SoundChannel();
		private var s:Sound;
		
		private var speakIndex:int = 0;
		private var speed:Number;
		
		private var Blend:int = 400;
		
		public function TextToSpeech(_str:String="",speed:Number=1) 
		{
			this.speed = speed;
			if (_str != ""){
				load(_str);
			}
		}
		
		public function play():void
		{
			stop();
			wordIndex = 0;
			loadIndex = 0;
			speakIndex = 0;
			
			Speak();
		}
		public function stop():void
		{
			sounds[speakIndex].stop();
			sp.removeEventListener(Event.ENTER_FRAME, WaitForEnd);
			s.removeEventListener(Event.COMPLETE, doLoadComplete);
		}
		public function load(_str:String):void
		{
			str = _str;
			words = Vector.<String>(str.split(' '));
			
			queries = new Vector.<String>();
			sounds = new Vector.<MP3Player>();
			
			wordIndex = 0;
			loadIndex = 0;
			speakIndex = 0;
			
			query(wordIndex);
			loadSpeech(loadIndex);
		}
		private function query(startIndex:int):void
		{
			var val:String = "";
			var limit:Boolean = false;
			for (var i:int = startIndex; i < words.length; ++i){
				if (val.length + words[i].length < 90 && !limit){
					if (i != startIndex) val += " ";
					val += words[i];
					wordIndex = i+1;
				}
				else limit = true;
			}
			queries.push(val);
			if (wordIndex < words.length-1) query(wordIndex);
		}
		private function loadSpeech(index:int):void
		{
			var url:String = 'http://translate.google.com/translate_tts?tl=' + Lang + '&q=' + queries[index];
			s = new Sound(new URLRequest(url));
			s.addEventListener(Event.COMPLETE, doLoadComplete);
		}
		private function doLoadComplete(event:Event):void
		{
			var m:MP3Player = new MP3Player();
			m.playbackSpeed = speed;
			m.loadSound(s);
			sounds.push(m);
			
			if (loadIndex == 0) dispatchEvent(new Event("FirstClipLoaded"));
			
			loadIndex++;
			if (loadIndex < queries.length) loadSpeech(loadIndex);
			else dispatchEvent(new Event(Event.COMPLETE));
		}
		
		
		private function Speak():void
		{
			sounds[speakIndex].play();
			trace(speakIndex);
			sp.addEventListener(Event.ENTER_FRAME, WaitForEnd);
			
		}
		private function WaitForEnd(event:Event):void
		{
			if (sounds.length > 0) {
				trace(sounds[speakIndex].position +" " + (sounds[speakIndex].length/speed - Blend/speed) );
				if (sounds[speakIndex].position > sounds[speakIndex].length/speed - Blend/speed){
					sp.removeEventListener(Event.ENTER_FRAME, WaitForEnd);
					speakIndex++;
					if(sounds[speakIndex-1].position > sounds[speakIndex-1].length/speed)
					sounds[speakIndex-1].stop();
					if (speakIndex < sounds.length) Speak();
					else {speakIndex--; stop();}
				}
			}
		}
	}
}
package  
{
	import flash.events.Event;
	import flash.events.SampleDataEvent;
	import flash.media.Sound;
	import flash.media.SoundChannel;
	import flash.net.URLRequest;
	import flash.utils.ByteArray;		

	/**
	 * @author Kelvin Luck
	 */
	public class MP3Player 
	{
		
		public static const BYTES_PER_CALLBACK:int = 4096; // Should be >= 2048 && <= 8192

		private var _playbackSpeed:Number = 1;	

		public function set playbackSpeed(value:Number):void
		{
			if (value < 0) {
				throw new Error('Playback speed must be positive!');
			}
			_playbackSpeed = value;
		}

		private var _mp3:Sound;
		private var _dynamicSound:Sound;
		private var _channel:SoundChannel;

		private var _phase:Number;
		private var _numSamples:int;

		public function MP3Player()
		{
		}

		public function loadAndPlay(request:URLRequest):void
		{
			_mp3 = new Sound();
			_mp3.addEventListener(Event.COMPLETE, mp3Complete);
			_mp3.load(request);
		}
		
		public function loadSound(s:Sound):void
		{
			_mp3 = s;
		}

		public function playLoadedSound(s:Sound):void
		{
			_mp3 = s;
			play();
		}
		
		public function stop():void
		{
			if (_dynamicSound) {
				_dynamicSound.removeEventListener(SampleDataEvent.SAMPLE_DATA, onSampleData);
				_channel.removeEventListener(Event.SOUND_COMPLETE, onSoundFinished);
				_dynamicSound = null;
				_channel = null;
			}
		}

		private function mp3Complete(event:Event):void
		{
			play();
		}

		public function play():void
		{
			stop();
			_dynamicSound = new Sound();
			_dynamicSound.addEventListener(SampleDataEvent.SAMPLE_DATA, onSampleData);
			
			_numSamples = int(_mp3.length * 44.1);
			
			_phase = 0;
			_channel = _dynamicSound.play();
			_channel.addEventListener(Event.SOUND_COMPLETE, onSoundFinished);
		}
		
		private function onSoundFinished(event:Event):void
		{
			/*_channel.removeEventListener(Event.SOUND_COMPLETE, onSoundFinished);
			_channel = _dynamicSound.play();
			_channel.addEventListener(Event.SOUND_COMPLETE, onSoundFinished);
			*/
			stop();
			
		}

		private function onSampleData( event:SampleDataEvent ):void
		{
			var l:Number;
			var r:Number;
			var p:int;
			
			
			var loadedSamples:ByteArray = new ByteArray();
			var startPosition:int = int(_phase);
			_mp3.extract(loadedSamples, BYTES_PER_CALLBACK * _playbackSpeed, startPosition);
			loadedSamples.position = 0;
			
			while (loadedSamples.bytesAvailable > 0) {
				
				p = int(_phase - startPosition) * 8;
				
				if (p < loadedSamples.length - 8 && event.data.length <= BYTES_PER_CALLBACK * 8) {
					
					loadedSamples.position = p;
					
					l = loadedSamples.readFloat(); 
					r = loadedSamples.readFloat(); 
				
					event.data.writeFloat(l); 
					event.data.writeFloat(r);
					 
				} else {
					loadedSamples.position = loadedSamples.length;
				}
				
				_phase += _playbackSpeed;
				
				// loop
				if (_phase >= _numSamples) {
					//_phase -= _numSamples;
					//stop();
					break;
				}
			}
		}
		
		public function get position():Number
		{
			return _channel?_channel.position: -1;
		}
		
		public function get length():Number
		{	
			return _mp3?_mp3.length: -1;	
		}
	}
}
Advertisements

One response to “Text to Speech Fun

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s