The chrome.tts
API is used to convert text into audible speech generated by the text-to-speech system. It takes advantage of the already built in TTS capabilities of the operating system.
The official Chrome Extensions documentation on TTS is here: https://developer.chrome.com/docs/extensions/reference/api/tts
How to declare in the Manifest.json
{
...
"permissions":[
"tts"
],
...
}
How to generate Speech from text
Run a simple command in the extension page, sidepanel page or background script of your extensions. This does not work in the content scripts like many other APIs.
chrome.tts.speak('Hello, world.');
Customization to speech
There a quick a number of options provided to generate different types of speech that the chrome.tts
provides. The official documentation provides a comprehensive list of options that can be applied: https://developer.chrome.com/docs/extensions/reference/api/tts#type-TtsOptions
chrome.tts.speak('Hello, world.', {
// All these are optional
volume: 0.5, // 0.0 - 1.0
rate: 2.0, // 0.1 - 10.0
pitch: 1.0, // 0.0 - 2.0
lang: 'en-US', // in the form language-region. Examples: 'en', 'en-US', 'en-GB', 'zh-CN'.
// override any current TTSs in progress.
enqueue: true,
// If empty, uses any available voice.
voiceName: "",
// events the voice desired or must support
// https://developer.chrome.com/docs/extensions/reference/api/tts#type-EventType
// useful in the onEvent function.
desiredEventTypes: [],
requiredEventTypes: [],
// event handler
onEvent: (ttsEvent)=>{
const charIndex = ttsEvent.charIndex;
const errorMessage = ttsEvent.errorMessage;
// The length of the next part of the utterance.
const length = ttsEvent.length;
//event type e.g start, end, word, sentence, interrupted, cancelled
const type = ttsEvent.type;
}
});
Get Voices
chrome.tts.getVoices((voices)=>{
for (var i = 0; i < voices.length; i++) {
console.log('Voice ' + i + ':');
console.log(' name: ' + voices[i].voiceName);
console.log(' lang: ' + voices[i].lang);
console.log(' extension id: ' + voices[i].extensionId);
console.log(' event types: ' + voices[i].eventTypes);
}
});
Control Speech
Here are different function you can call from the chrome.tts
API to control the system speech.
// Check if the system is speaking
chrome.tts.isSpeaking((speaking) => {
if(speaking){
// Todo Code
}
})
// Pause Speech
chrome.tts.pause();
// Resume Speech
chrome.tts.resume();
// Stop Speech
chrome.tts.stop();
Sample Project
We are going to make a chrome extension that reads out the text a user submits. In addition we will use the chrome.contextMenus
API for a user to right-click on selectable text for the system to read out. If you have no clue on the chrome.contextMenus
API you can read our article on it Understand Chrome Extensions Context Menus
Alright we will need 4 files for this project background.js
manifest.json
and popup.html
and popup.js
You can find the source to this project here: https://github.com/BuildChromeExtensions/textToSpeech
manifest.json
{
"name": "Text to Speech",
"version": "1.0.0.0",
"manifest_version": 3,
"permissions": [
"tts",
"contextMenus"
],
"action": {
"default_popup": "popup.html"
},
"background": {
"service_worker": "background.js"
}
}
popup.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document</title>
<style>
body {
width: 300px;
height: 400px;
background: linear-gradient(#a2a3a2, #FFAA44);
}
form {
justify-content: center;
display: flex;
flex-direction: column;
padding: 10px 20px;
color: white;
}
label {
display: flex;
width: 100%;
margin: 4px 0px;
}
span {
margin: 0px 10px;
}
input {
width: 100%;
padding: 4px 8px;
border-radius: 30px;
}
button {
background: #764d00;
color: #ffffff;
padding: 10px 20px;
border: none;
cursor: pointer;
box-shadow: rgba(100, 100, 111, 0.2) 0px 7px 29px 0px;
transition: all 0.4s;
border-radius: 30px;
font-weight: 700;
margin:4px;
}
</style>
</head>
<body>
<form>
<h1>Text to Speech</h1>
<input required placeholder="Text to Read" name="text" maxlength="200" />
<label><span>Volume</span><input name="volume" type="range" step="0.05" value="0.5" min="0" max="1" /></label>
<label><span>Pitch</span><input name="pitch" type="range" step="0.05" value="1" min="0" max="1" /></label>
<label><span>Rate</span><input name="rate" type="range" step="0.05" value="1" min="0" max="10" /></label>
<label><span>Enqueue</span><input name="enqueue" type="checkbox" /></label>
<select name="voiceName"></select>
<button type="submit">Speak</button>
</form>
<button id="resume">Resume</button>
<button id="pause">Pause</button>
<button id="stop">Stop</button>
<script type="text/javascript" src="/popup.js"></script>
</body>
</html>
popup.js
// add voice to select list on load
chrome.tts.getVoices((voices) => {
const select = document.querySelector('select');
for (var i = 0; i < voices.length; i++) {
const option = document.createElement('option');
option.textContent = voices[i].voiceName + `${voices[i].voiceName} (${voices[i].lang})`;
option.value = voices[i].voiceName;
select.appendChild(option);
}
});
document.querySelector('form').onsubmit = (e) => {
e.preventDefault();
const text = e.target.text.value;
const volume = parseFloat(e.target.volume.value);
const pitch = parseFloat(e.target.pitch.value);
const rate = parseFloat(e.target.rate.value);
const enqueue = e.target.enqueue.checked;
const voiceName = e.target.voiceName.value;
chrome.tts.speak(text, {
/**************************
*
* All these are optional
*
********************** */
volume: volume, // 0.0 - 1.0
rate: rate, // 0.1 - 10.0
pitch: pitch, // 0.0 - 2.0
lang: 'en-US', // in the form language-region. Examples: 'en', 'en-US', 'en-GB', 'zh-CN'.
// override any current TTSs in progress.
enqueue: enqueue,
// If empty, uses any available voice.
voiceName: voiceName,
// event handler
onEvent: (ttsEvent) => {
console.log(ttsEvent)
}
});
}
document.getElementById('pause').onclick = () => {
chrome.tts.pause();
}
document.getElementById('resume').onclick = () => {
chrome.tts.resume();
}
document.getElementById('stop').onclick = () => {
chrome.tts.stop();
}
background.js
// when extension is installe create context menu
chrome.runtime.onInstalled.addListener(() => {
chrome.contextMenus.create({ id: "speak", title: "Read", contexts: ["selection"] })
});
chrome.contextMenus.onClicked.addListener((info, tab) => {
if (info.menuItemId == 'speak') {
// get text
const text = info.selectionText;
// read out text
chrome.tts.speak(text);
}
});
If you’re interested you can implement a text-to-speech engine using the
ttsEngine
API. The documentation for that is here.
Leave a Reply