Source code

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
export class NaiveBayesTextTagger {
constructor(model, toksToTfIdfVector) {
this.model = model;
this.toksToTfIdfVector = toksToTfIdfVector;
}
/**
* Determines if the tokenized text belongs to class according to binary naive Bayes
* classifier. Returns an object containing the class label ("label"), and
* the log probability ("logProb") that the text belongs to that class. If
* the positive class is more likely, then "label" is the positive class
* label. If the negative class is matched, then "label" is set to null.
*/
tagTokens(tokens) {
let fv = this.toksToTfIdfVector(tokens, this.model.vocab_idfs);
let bestLogProb = null;
let bestClassId = -1;
let bestClassLabel = null;
let logSumExp = 0.0; // will be P(x). Used to create a proper probability
for (let classId = 0; classId < this.model.classes.length; classId++) {
let classModel = this.model.classes[classId];
let classLogProb = classModel.log_prior;
// dot fv with the class model
for (let pair of Object.values(fv)) {
let [termId, tfidf] = pair;
classLogProb += tfidf * classModel.feature_log_probs[termId];
}
if (bestLogProb === null || classLogProb > bestLogProb) {
bestLogProb = classLogProb;
bestClassId = classId;
}
logSumExp += Math.exp(classLogProb);
}
// now normalize the probability by dividing by P(x)
logSumExp = Math.log(logSumExp);
bestLogProb -= logSumExp;
if (bestClassId === this.model.positive_class_id) {
bestClassLabel = this.model.positive_class_label;
} else {
bestClassLabel = null;
}
let confident =
bestClassId === this.model.positive_class_id &&
bestLogProb > this.model.positive_class_threshold_log_prob;
return {
label: bestClassLabel,
logProb: bestLogProb,
confident,
};
}
}