import Dictionary from "./dictionary.mjs";
let INBETWEEN_CHARS_REGEX = "[\\s\\.\\?!,]";
let PHRASE_END_CHARS_REGEX = "[\\.\\?!,]";
let SENTENCE_END_CHARS_REGEX = "[\\.\\?!]";
/**
* Constructs a Predictionary word prediction class. It's possible to manage multiple internal dictionaries, retrieve predictions
* (suggestions) for a given input and learn/refine the dictionaries from user input.
*
* @constructor
*/
function Predictionary() {
/**
* Default dictionary key, if no key is specified.
* @type {string}
*/
this.DEFAULT_DICTIONARY_KEY = 'DEFAULT_DICTIONARY_KEY';
let thiz = this;
let PREDICT_METHOD_COMPLETE_WORD = 'PREDICT_METHOD_COMPLETE_WORD';
let PREDICT_METHOD_NEXT_WORD = 'PREDICT_METHOD_NEXT_WORD';
let _dicts = {};
let _lastChosenWord = null;
/**
* Loads a single dictionary from a JSON string that was previously exported by {@link Predictionary#dictionaryToJSON}.
* If the given dictionaryKey already exists, the existing dictionary is replaced.
*
* @param {string} dictionaryJSON json string representing a dictionary, exported by {@link Predictionary#dictionaryToJSON}
* @param {string} [dictionaryKey={@link Predictionary#DEFAULT_DICTIONARY_KEY}] the key for which the dictionary should
* be imported.
*/
this.loadDictionary = function (dictionaryJSON, dictionaryKey) {
if (!dictionaryJSON) {
throw 'dictionaryJSON must be specified.';
}
dictionaryKey = dictionaryKey || thiz.DEFAULT_DICTIONARY_KEY;
let dictionary = new Dictionary();
dictionary.load(dictionaryJSON);
_dicts[dictionaryKey] = dictionary;
};
/**
* Loads all dictionaries from a JSON string that was previously exported by {@link Predictionary#dictionariesToJSON}.
* This method replaces/deletes all currently loaded dictionaries!
* @param {string} dictionariesJSON json string representing dictionaries, exported by {@link Predictionary#dictionariesToJSON}
*/
this.loadDictionaries = function (dictionariesJSON) {
if (!dictionariesJSON) {
throw 'dictionariesJSON must be specified.';
}
_dicts = {};
let list = JSON.parse(dictionariesJSON);
list.forEach(element => {
thiz.loadDictionary(element.json, element.key);
})
};
/**
* Exports a single dictionary to a JSON string. Subsequently the dictionary can be imported using the resulting string
* and {@link Predictionary#loadDictionary}.
*
* @param {string} [dictionaryKey={@link Predictionary#DEFAULT_DICTIONARY_KEY}] the key of the dictionary to export.
* @return {string} JSON string representing the currently loaded dictionary with specified dictionaryKey.
*/
this.dictionaryToJSON = function (dictionaryKey) {
dictionaryKey = dictionaryKey || thiz.DEFAULT_DICTIONARY_KEY;
let dict = _dicts[dictionaryKey];
return dict ? dict.toJSON() : null;
};
/**
* Exports all dictionaries to a JSON string. Subsequently the dictionaries can be imported using the resulting string
* and {@link Predictionary#loadDictionaries}.
*
* @return {string} JSON string representing all currently loaded dictionaries.
*/
this.dictionariesToJSON = function () {
let list = [];
Object.keys(_dicts).forEach(key => {
list.push({
key: key,
json: _dicts[key].toJSON()
})
});
return JSON.stringify(list);
};
/**
* Use only a singe loaded dictionary for predictions.
*
* @param {string} dictionaryKey the key of the dictionary to use
*/
this.useDictionary = function (dictionaryKey) {
if (!dictionaryKey) {
throw 'dictionaryKey must be specified.';
}
Object.keys(_dicts).forEach(key => {
_dicts[key].disabled = dictionaryKey !== key;
});
};
/**
* Use a set of dictionaries for predictions, specified by an array of dictionaryKeys.
*
* @param {Array} dictionaryKeys an array of strings, specifying the dictionaryKeys to use
*/
this.useDictionaries = function (dictionaryKeys) {
if (!(dictionaryKeys instanceof Array)) {
throw 'dictionaryKeys must be specified and of type Array.';
}
Object.keys(_dicts).forEach(key => {
_dicts[key].disabled = dictionaryKeys.indexOf(key) === -1 && key !== thiz.DEFAULT_DICTIONARY_KEY;
});
};
/**
* Use all currently loaded dictionaries for predictions.
*/
this.useAllDictionaries = function () {
Object.keys(_dicts).forEach(key => {
_dicts[key].disabled = false;
});
};
/**
* Add a new internal dictionary for predictions.
*
* @param {string} dictionaryKey the key of the dictionary to add
* @param {Array} [words] Optional array of words (string) that should be added to the new dictionary.
*/
this.addDictionary = function (dictionaryKey, words) {
if (!dictionaryKey) {
throw 'dictionaryKey must be specified.';
}
if (_dicts[dictionaryKey]) {
throw 'dictionary already existing.';
}
_dicts[dictionaryKey] = new Dictionary();
if (words && words instanceof Array) {
words.forEach(element => {
thiz.addWord(element, dictionaryKey);
});
}
};
/**
* Add a single new word/element to a dictionary.
*
* @param {string|Object} element the element to add, can be either a plain word as a string or an object containing
* the properties object.word (word to add as string) and object.rank (number specifying the rank of the word,
* a lower rank causes the word to be ranked to front as a suggestion).
* @param {string} [dictionaryKey={@link Predictionary#DEFAULT_DICTIONARY_KEY}] optional key of the dictionary to add the element.
*/
this.addWord = function (element, dictionaryKey) {
dictionaryKey = dictionaryKey || thiz.DEFAULT_DICTIONARY_KEY;
if (!element) {
throw 'element to add not specified.';
}
if (!_dicts[dictionaryKey]) {
thiz.addDictionary(dictionaryKey);
}
let dict = _dicts[dictionaryKey];
if (typeof element === 'string') {
dict.addWord(sanitize(element));
} else if (element.word && typeof element.word === 'string') {
dict.addWord(sanitize(element.word), element.rank);
}
};
/**
* Add multiple new words/elements to a dictionary.
*
* @param {string|Object} elements the elements to add, can be an Array of either plain words (string) or of objects containing
* the properties object.word (word to add as string) and object.rank (number specifying the rank of the word,
* a lower rank causes the word to be ranked to front as a suggestion).
* @param {string} [dictionaryKey={@link Predictionary#DEFAULT_DICTIONARY_KEY}] optional key of the dictionary to add the element.
*/
this.addWords = function (elements, dictionaryKey) {
if (!(elements instanceof Array)) {
throw 'elements to add must be instance of array specified.';
}
elements.forEach(element => {
thiz.addWord(element, dictionaryKey);
})
};
/**
* Deletes a single word from one or all dictionaries.
*
* @param {string} inputOrWord a single word or longer string where the last word will be deleted in the dictionaries.
* @param {Object} [options] Object for options
* @param {string} [options.dictionaryKey] the key of the dictionary where the word should be deleted. If not
* specified the word is deleted in all dictionaries.
* @param {string} [options.ignoreCase] if false or undefined (default) only words with matching cases are
* deleted, otherwise also words with non-matching case.
*/
this.delete = function (inputOrWord, options) {
let word = getLastWord(inputOrWord);
options = options || {};
if (word) {
if (!options.dictionaryKey) {
thiz.getDictionaryKeys().forEach(key => {
_dicts[key].deleteWord(word, options.ignoreCase);
});
} else if (_dicts[options.dictionaryKey]) {
_dicts[options.dictionaryKey].deleteWord(word, options.ignoreCase);
}
}
};
/**
* Import words from a plain string (e.g. text file).
*
* @param {string} importString a plain text string (e.g. from a text file)
* @param {Object} [options] options object containing additional properties. The default properties are suited for
* a plain text string in format: "word1;word2;word3;...", setting rankPosition=1 would be suited for
* a plain text in format: "word1 rank1;word2 rank2;word3 rank3;...".
* @param {string} [options.elementSeparator=;] separator to split the elements from the importString
* @param {string} [options.rankSeparator=<space>] separator to split a single element into word and rank
* @param {string} [options.wordPosition=0] position of the word in the element (0-based)
* @param {string} [options.rankPosition] position of the rank in the element (0-based)
* @param {string} [options.addToDictionary={@link Predictionary#DEFAULT_DICTIONARY_KEY}] key of the dictionary where
* the words should be added.
*/
this.parseWords = function (importString, options) {
options = options || {};
let elementSeparator = options.elementSeparator || ';';
let rankSeparator = options.rankSeparator || ' ';
let wordPosition = options.wordPosition || 0;
let wordPosition2 = options.wordPosition2;
let rankPosition = options.rankPosition;
let addToDictionary = options.addToDictionary || thiz.DEFAULT_DICTIONARY_KEY;
let lines = importString.split(elementSeparator);
lines.forEach(line => {
let elems = line.split(rankSeparator);
let rank = parseInt(elems[rankPosition]);
if (wordPosition !== undefined && wordPosition2 !== undefined) {
let word1 = elems[wordPosition];
let word2 = elems[wordPosition2];
if (word1 && word2) {
thiz.learn(word2, word1, addToDictionary);
}
} else if (elems[wordPosition]) {
let elementToAdd = {
word: elems[wordPosition].trim()
};
if (!isNaN(rank)) {
elementToAdd.rank = rank;
}
thiz.addWord(elementToAdd, addToDictionary);
}
});
};
/**
* Retrieves saved words of a single or all loaded dictionaries.
*
* @param {string} [dictionaryKey] key of the dictionary from which the words should be retrieved, if not set all
* dictionaries are used.
* @return {string[]} array of saved words (string) for the dictionary with the given key.
*/
this.getWords = function (dictionaryKey) {
let words = [];
if (!dictionaryKey) {
thiz.getDictionaryKeys().forEach(key => {
words = words.concat(_dicts[key].getWords())
});
} else if (_dicts[dictionaryKey]) {
words = _dicts[dictionaryKey].getWords();
}
return words;
};
/**
* Test if a given word exists a single or all loaded dictionaries.
*
* @param {string} word to test if existing
* @param {string} [dictionaryKey] key of the dictionary to check, if not set all dictionaries are used
* @param {boolean} [matchCase] if set (true) the word is searched case-sensitive, otherwise case-insensitive (default)
* @return {boolean} true if the given word is existing
*/
this.hasWord = function (word, dictionaryKey, matchCase) {
let allElementsString = " " + thiz.getWords(dictionaryKey).join(" ") + " ";
let flag = matchCase ? "" : "i";
return new RegExp(" " + word + " ", flag).test(allElementsString);
};
/**
* Returns word suggestions for a given input. Automatically detects if the last word should be completed (last
* character is not space) or if a next word should be suggested (last character is space).
*
* @param {string} input string for which the predictions should be calculated, e.g. the value of a text input
* where the user is typing.
* @param {Object} [options] options object containing additional properties.
* @param {number} [options.maxPredictions=10] number of suggestions that should be retrieved maximally //TODO maxPredictions
* @param {boolean} [options.applyToInput] if true the suggestions are applied to the original input before being returned
* @return {string[]} list of words that are predictions/suggestions for the given input, ordered by relevance.
*/
this.predict = function (input, options) {
return predictInternal(input, options);
};
/**
* Returns word suggestions for a given input. Last word is assumed to be incomplete and has to be completed.
*
* @param {string} input string for which the predictions should be calculated, e.g. the value of a text input
* where the user is typing.
* @param {Object} [options] options object containing additional properties.
* @param {number} [options.maxPredictions=10] number of suggestions that should be retrieved maximally
* @param {boolean} [options.applyToInput] if true the suggestions are applied to the original input before being returned
* @return {string[]} list of words that are predictions/suggestions for the given input, ordered by relevance.
*/
this.predictCompleteWord = function (input, options) {
return predictInternal(input, options, PREDICT_METHOD_COMPLETE_WORD);
};
/**
* Returns word suggestions for a given input. Last word is assumed to be complete and suggestions for the next words
* are calculated.
*
* @param {string} input string for which the predictions should be calculated, e.g. the value of a text input
* where the user is typing.
* @param {Object} [options] options object containing additional properties.
* @param {number} [options.maxPredictions=10] number of suggestions that should be retrieved maximally
* @param {boolean} [options.applyToInput] if true the suggestions are applied to the original input before being returned
* @return {string[]} list of words that are predictions/suggestions for the given input, ordered by relevance.
*/
this.predictNextWord = function (input, options) {
return predictInternal(input, options, PREDICT_METHOD_NEXT_WORD);
};
/**
* Apply chosen suggestion to a given input, e.g. input = "this is an ap" and chosenPrediction = "Apple" results in
* "this is an Apple". Calling this function automatically refines the saved frequency of the chosen word making it
* more likely to be suggested in the future.
*
* @param {string} input the current input string (e.g. from textfield)
* @param {string} chosenPrediction the chosen prediction which should be applied to the input string
* @param {Object} [options] options object containing additional properties.
* @param {string} [options.addToDictionary] the key of the dictionary where new learned words should be added. If not
* set the dictionary to add is automatically determined.
* @param {number} [options.shouldCompleteLastWord] if true the last word is completed, if false the chosen prediction
* is added as new word. If not set this decision is done automatically (last character is space -> new word).
* @param {boolean} [options.dontLearn] if true the chosen predictions are not added or their frequencies updated
* @return {string} the given input with the applied suggestion
*/
this.applyPrediction = function (input, chosenPrediction, options) {
options = options || {};
let addToDictionary = options.addToDictionary || (thiz.isUsingOnlyDefaultDictionary() ? thiz.DEFAULT_DICTIONARY_KEY : null);
let shouldCompleteLastWord = options.shouldCompleteLastWord !== undefined ? options.shouldCompleteLastWord : !isLastWordCompleted(input);
let dontLearn = options.dontLearn;
let lastWord = getLastWord(input);
let preLastWord = getLastWord(input, 2);
let temp = shouldCompleteLastWord ? input.substring(0, input.lastIndexOf(lastWord)) : input;
if (temp.length > 0 && (!isLastWordCompleted(temp) || new RegExp(PHRASE_END_CHARS_REGEX).test(temp[temp.length - 1]))) {
temp += ' ';
}
if (!dontLearn) {
thiz.learn(chosenPrediction, !shouldCompleteLastWord ? lastWord : preLastWord, addToDictionary);
}
return temp + chosenPrediction + ' ';
};
/**
* Updates the frequencies for given words, making them to be more likely suggested in the future.
*
* @param {string} chosenWord a suggestion/word the user has chosen
* @param {string} [previousWord] the previous word of the chosen suggestion
* @param {string} [addToDictionary] the key of the dictionary where new words should be added. Automatically determined, if not specified.
*/
this.learn = function (chosenWord, previousWord, addToDictionary) {
chosenWord = sanitize(chosenWord);
previousWord = sanitize(previousWord);
let dictKeys = thiz.getDictionaryKeys(true);
addToDictionary = dictKeys.length === 1 ? dictKeys[0] : addToDictionary;
if (dictKeys.length > 0 && (!addToDictionary || !_dicts[addToDictionary])) {
let currentHighscore = 0;
dictKeys.forEach(key => {
let score = 0;
if (thiz.hasWord(chosenWord, key)) {
score += 2;
}
if (thiz.hasWord(previousWord, key)) {
score++;
}
if (score > 0 && score >= currentHighscore) {
currentHighscore = score;
addToDictionary = key;
}
});
}
addToDictionary = addToDictionary || thiz.DEFAULT_DICTIONARY_KEY;
if (!_dicts[addToDictionary]) {
thiz.addDictionary(addToDictionary);
}
Object.keys(_dicts).forEach(key => {
let dict = _dicts[key];
if (!dict.disabled) {
dict.learn(chosenWord, previousWord, addToDictionary === key);
}
});
};
/**
* Learns from input text while the user is typing. This method can be called with e.g. the value of a text input
* field for every character the user is typing.
*
* @param {string} input the text string to learn with. The second last and third last words are learned.
* @param {string} [dictionaryKey] the key of the dictionary where new words should be added. Automatically determined, if not specified.
* @return {boolean} true if something was learned, false if not
*/
this.learnFromInput = function (input, dictionaryKey) {
if (isLastWordCompleted(input)) {
let chosenWord = getLastWord(input, 2);
let previousWord = getLastWord(input, 3);
if (chosenWord && chosenWord !== _lastChosenWord) {
_lastChosenWord = chosenWord;
thiz.learn(chosenWord, previousWord, dictionaryKey);
return true;
}
}
return false;
};
/**
* Learns words and transitions from a given text/phrase.
*
* @param {string} text the text to learn from
* @param {string} [dictionaryKey={@link Predictionary#DEFAULT_DICTIONARY_KEY}] the key of the dictionary where the words should
* be learned/added.
*/
this.learnFromText = function (text, dictionaryKey) {
text = text.replace(/\s\s/g, ' ');
let sentences = text.split(new RegExp(SENTENCE_END_CHARS_REGEX));
sentences.forEach(sentence => {
let words = sentence.split(' ');
for (let i = 0; i < words.length - 1; i++) {
this.learn(words[i + 1], words[i], dictionaryKey);
}
});
};
/**
* Returns a list of currently loaded dictionary keys.
*
* @param {boolean} [onlyEnabled] if true only keys of dictionaries that are enabled are returned. See e.g.
* {@link Predictionary#useDictionaries}
* @return {string[]}
*/
this.getDictionaryKeys = function (onlyEnabled) {
if (onlyEnabled) {
return Object.keys(_dicts).filter(element => !_dicts[element].disabled);
}
return Object.keys(_dicts);
};
/**
* Returns true if only the default dictionary is used (key {@link Predictionary#DEFAULT_DICTIONARY_KEY}).
* @return {boolean}
*/
this.isUsingOnlyDefaultDictionary = function () {
let keys = thiz.getDictionaryKeys();
return keys.length === 0 || (keys.length === 1 && keys[0] === thiz.DEFAULT_DICTIONARY_KEY);
};
function predictInternal(input, options, predictType) {
let predictions = [];
options = options || {};
options.maxPredictions = options.maxPredictions || options.maxPredicitons || 10;
options.applyToInput = options.applyToInput || false;
Object.keys(_dicts).forEach(key => {
let dict = _dicts[key];
if (!dict.disabled) {
let predictFn = predictType === PREDICT_METHOD_NEXT_WORD ? dict.predictNextWord : (predictType === PREDICT_METHOD_COMPLETE_WORD ? dict.predictCompleteWord : null);
predictFn = predictFn || (isLastWordCompleted(input) ? dict.predictNextWord : dict.predictCompleteWord);
predictions = predictions.concat(predictFn(getLastWord(input), options));
}
});
predictions.sort((a, b) => {
if (a.fuzzyMatch !== b.fuzzyMatch) {
return a.fuzzyMatch ? 1 : -1;
}
if (a.frequency !== b.frequency) {
return (a.frequency < b.frequency) ? 1 : -1;
}
if (a.rank !== b.rank) {
if (a.rank && b.rank === undefined) return -1;
if (b.rank && a.rank === undefined) return 1;
return (a.rank < b.rank) ? -1 : 1
}
return 0;
});
let returnArray = [];
for (let i = 0; i < predictions.length && returnArray.length < options.maxPredictions; i++) {
if (returnArray.indexOf(predictions[i].word) === -1) { //de-duplicate
if (options.applyToInput) {
returnArray.push(thiz.applyPrediction(input, predictions[i].word, {dontLearn: true}));
} else {
returnArray.push(predictions[i].word);
}
}
}
return returnArray;
}
}
function getLastWord(text, index) {
index = index || 1;
let words = text.trim().split(new RegExp(INBETWEEN_CHARS_REGEX)).filter(word => !!word);
let returnWord = words[words.length - index] || '';
return returnWord.replace(new RegExp(INBETWEEN_CHARS_REGEX, 'g'), '');
}
function isLastWordCompleted(text) {
return new RegExp(INBETWEEN_CHARS_REGEX).test(text[text.length - 1]);
}
function sanitize(word) {
word = word || '';
return word.replace(/[^a-z0-9áéíóúñüäöß'`´’]/gim, '');
}
/**
* Constructs a new instance of Predictionary
*
* @return {Predictionary}
*/
Predictionary.instance = function () {
return new Predictionary();
};
export default Predictionary;
export function instance() {
return new Predictionary();
}