wink-expression-extractor.js
Takes "input.txt" produces "output.json"
const winkNLP = require('wink-nlp');
const model = require('wink-eng-lite-web-model');
const fs = require('fs');
const nlp = winkNLP(model);
const text = fs.readFileSync('input.txt', 'utf-8');
// Obtain "its" helper to extract item properties.
const its = nlp.its;
// Obtain "as" reducer helper to reduce a collection.
const as = nlp.as;
const patterns = [
{
name: 'nounPhrase',
label: 'nounPhrase',
patterns: [ '[|DET] [|ADJ] [NOUN|PROPN]' ]
},
{
name: 'verbPhrase',
label: 'verbPhrase',
patterns: [ '[|ADV] [|PARTICLE] [|ADJ] [|NOUN] [VERB]' ]
}
];
nlp.learnCustomEntities(patterns);
const doc = nlp.readDoc(text);
const entities = doc.customEntities().out(its.detail);
fs.writeFile('output.json', JSON.stringify(entities), (err) => {
if (err) throw err;
console.log('The output has been written to output.json');
});
Input.txt can be any textfile. This will extract some noun and verb phrases that match specific patterns and then output them to a json file.
Bayes_sandbox.js
Takes "output.json"
Expected output: "verbPhrase"
const fs = require('fs');
// Read the contents of the file into a string
const jsonString = fs.readFileSync('output.json', 'utf-8');
// Parse the JSON string into a JavaScript object
const jsonObj = JSON.parse(jsonString);
// Load Naive Bayes Text Classifier
var Classifier = require( 'wink-naive-bayes-text-classifier' );
// Instantiate
var nbc = Classifier();
// Load wink nlp and its model
const winkNLP = require( 'wink-nlp' );
// Load language model
const model = require( 'wink-eng-lite-web-model' );
const nlp = winkNLP( model );
const its = nlp.its;
const prepTask = function ( text ) {
const tokens = [];
nlp.readDoc(text)
.tokens()
// Use only words ignoring punctuations etc and from them remove stop words
.filter( (t) => ( t.out(its.type) === 'word' && !t.out(its.stopWordFlag) ) )
// Handle negation and extract stem of the word
.each( (t) => tokens.push( (t.out(its.negationFlag)) ? '!' + t.out(its.stem) : t.out(its.stem) ) );
return tokens;
};
nbc.definePrepTasks( [ prepTask ] );
// Configure behavior
nbc.defineConfig( { considerOnlyPresence: true, smoothingFactor: 0.5 } );
// Train!
jsonObj.forEach(obj =>
nbc.learn(obj.value, obj.type));
nbc.consolidate();
console.log( nbc.predict( 'failing stars' ) );
This file uses the data extracted from the first file to train a Bayes classifier. It then should correctly predict that "failing stars" is a verbal phrase.
No comments:
Post a Comment