WinkNLP has a simple but powerful text classifier using Bayesian analysis. I have working code.
const winkNLP = require('wink-nlp');
const model = require('wink-eng-lite-web-model');
const fs = require('fs');
const nlp = winkNLP(model);
const text = fs.readFileSync('input.txt', 'utf-8');
const patterns = [
{
name: 'nounPhrase',
patterns: [ '[|DET] [|ADJ] [NOUN|PROPN]' ]
},
];
nlp.learnCustomEntities(patterns);
const doc = nlp.readDoc(text);
const entities = doc.customEntities().out();
fs.writeFile('noun_phrases.txt', entities.join('\n'), (err) => {
if (err) throw err;
console.log('The output has been written to output.txt');
});
Okay, this is clunky, but functioning. You will need to run this code through twice, once to create a textfile called noun_phrases.txt and another to create a textfile called verb_phrases.txt.
Now, we want to do the same thing, but create a file called verb_phrases.txt. To make this as simple as possible, I've provided a separate file. Clunky, I know.
const winkNLP = require('wink-nlp');
const model = require('wink-eng-lite-web-model');
const fs = require('fs');
const nlp = winkNLP(model);
const text = fs.readFileSync('input.txt', 'utf-8');
const patterns = [
{
name: 'verbPhrase',
patterns: [ '[|ADV] [|PARTICLE] [|ADJ] [|NOUN] [VERB]' ]
}
];
nlp.learnCustomEntities(patterns);
const doc = nlp.readDoc(text);
const entities = doc.customEntities().out();
fs.writeFile('verb_phrases.txt', entities.join('\n'), (err) => {
if (err) throw err;
console.log('The output has been written to output.txt');
});
Obviously, you can just make the edits yourself. I have to preprocess the data to get rid of the clunkiness. This is an epic amount of clunkiness.
// Load Naive Bayes Text Classifier
var Classifier = require('wink-naive-bayes-text-classifier');
// Instantiate
var nbc = Classifier();
// Load wink nlp and its model
const winkNLP = require('wink-nlp');
// Load language model
const model = require('wink-eng-lite-web-model');
const nlp = winkNLP(model);
const its = nlp.its;
const fs = require('fs');
// Function to read and preprocess the contents of the file
const readAndPreprocessFile = function (filePath) {
const fileContents = fs.readFileSync(filePath, 'utf-8');
const nounPhrases = fileContents.split('\n');
return nounPhrases;
};
// Define a pre-processing task
const prepTask = function (text) {
const tokens = [];
nlp
.readDoc(text)
.tokens()
// Use only words ignoring punctuations etc and from them remove stop words
.filter((t) => t.out(its.type) === 'word' && !t.out(its.stopWordFlag))
// Handle negation and extract stem of the word
.each((t) =>
tokens.push(t.out(its.negationFlag) ? '!' + t.out(its.stem) : t.out(its.stem))
);
return tokens;
};
// Define the pre-processing task for the classifier
nbc.definePrepTasks([prepTask]);
// Configure behavior
nbc.defineConfig({ considerOnlyPresence: true, smoothingFactor: 0.5 });
// Read the file and train the classifier
const nounPhrases = readAndPreprocessFile('noun_phrases.txt');
nounPhrases.forEach((np) => nbc.learn(np, 'nounPhrase'));
const verbPhrases = readAndPreprocessFile('verb_phrases.txt');
verbPhrases.forEach((vp) => nbc.learn(vp, 'verbPhrase'));
nbc.consolidate();
// The classifier is now trained and can be used to make predictions.
console.log( nbc.predict( 'failing stars' ) );
The expected output is verbPhrase.
No comments:
Post a Comment