Wednesday, February 22, 2023

Bayes Sandbox (Updated)

 wink-expression-extractor.js

Takes "input.txt" produces "output.json"

const winkNLP = require('wink-nlp');
const model = require('wink-eng-lite-web-model');
const fs = require('fs');

const nlp = winkNLP(model);

const text = fs.readFileSync('input.txt', 'utf-8');

// Obtain "its" helper to extract item properties.
const its = nlp.its;
// Obtain "as" reducer helper to reduce a collection.
const as = nlp.as;

const patterns = [
  {
    name: 'nounPhrase',
    label: 'nounPhrase',
    patterns: [ '[|DET] [|ADJ] [NOUN|PROPN]' ]
  },
  {
    name: 'verbPhrase',
    label: 'verbPhrase',
    patterns: [ '[|ADV] [|PARTICLE] [|ADJ] [|NOUN] [VERB]' ]
  }
];

nlp.learnCustomEntities(patterns);

const doc = nlp.readDoc(text);

const entities = doc.customEntities().out(its.detail);

fs.writeFile('output.json', JSON.stringify(entities), (err) => {
  if (err) throw err;
  console.log('The output has been written to output.json');
});


Input.txt can be any textfile. This will extract some noun and verb phrases that match specific patterns and then output them to a json file. 

Bayes_sandbox.js

Takes "output.json"

Expected output: "verbPhrase"

const fs = require('fs');

// Read the contents of the file into a string
const jsonString = fs.readFileSync('output.json', 'utf-8');

// Parse the JSON string into a JavaScript object
const jsonObj = JSON.parse(jsonString);

// Load Naive Bayes Text Classifier
var Classifier = require( 'wink-naive-bayes-text-classifier' );
// Instantiate
var nbc = Classifier();
// Load wink nlp and its model
const winkNLP = require( 'wink-nlp' );
// Load language model
const model = require( 'wink-eng-lite-web-model' );
const nlp = winkNLP( model );
const its = nlp.its;

const prepTask = function ( text ) {
  const tokens = [];
  nlp.readDoc(text)
      .tokens()
      // Use only words ignoring punctuations etc and from them remove stop words
      .filter( (t) => ( t.out(its.type) === 'word' && !t.out(its.stopWordFlag) ) )
      // Handle negation and extract stem of the word
      .each( (t) => tokens.push( (t.out(its.negationFlag)) ? '!' + t.out(its.stem) : t.out(its.stem) ) );

  return tokens;
};
nbc.definePrepTasks( [ prepTask ] );
// Configure behavior
nbc.defineConfig( { considerOnlyPresence: true, smoothingFactor: 0.5 } );
// Train!

jsonObj.forEach(obj =>
    nbc.learn(obj.value, obj.type));

nbc.consolidate();

console.log( nbc.predict( 'failing stars' ) );



This file uses the data extracted from the first file to train a Bayes classifier. It then should correctly predict that "failing stars" is a verbal phrase.

No comments:

Post a Comment