如何将词性标注器与 SentiWordNet 算法集成
How To integrate POS tagger with SentiWordNet algorithm
Here is my SentiWorNet Algo:
public class SWN3 {
private String pathToSWN = "C:/Users/RAHUL/Desktop/SWN/SentiWordNet_3.0.0.txt";
private HashMap<String, Double>_dict;
public SWN3(){
_dict = new HashMap<String, Double>();
HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>();
try{
BufferedReader csv = new BufferedReader(new FileReader(pathToSWN));
String line = "";
while((line = csv.readLine()) != null)
{
String[] data = line.split("\t");
Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]);
String[] words = data[4].split(" ");
for(String w:words)
{
String[] w_n = w.split("#");
w_n[0] += "#"+data[0];
int index = Integer.parseInt(w_n[1])-1;
if(_temp.containsKey(w_n[0]))
{
Vector<Double> v = _temp.get(w_n[0]);
if(index>v.size())
for(int i = v.size();i<index; i++)
v.add(0.0);
v.add(index, score);
_temp.put(w_n[0], v);
}
else
{
Vector<Double> v = new Vector<Double>();
for(int i = 0;i<index; i++)
v.add(0.0);
v.add(index, score);
_temp.put(w_n[0], v);
}
}
}
Set<String> temp = _temp.keySet();
for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) {
String word = iterator.next();
Vector<Double> v = _temp.get(word);
double score = 0.0;
double sum = 0.0;
for(int i = 0; i < v.size(); i++)
score += ((double)1/(double)(i+1))*v.get(i);
for(int i = 1; i<=v.size(); i++)
sum += (double)1/(double)i;
score /= sum;
String sent = "";
if(score>=0.75)
sent = "strong_positive";
else
if(score > 0.50 && score<0.75)
sent = "moderately_positive";
else
if(score > 0.25 && score>=0.50)
sent = "positive";
else
if(score > 0 && score>=0.25)
sent = "weak_positive";
else
if(score < 0 && score>=-0.25)
sent = "weak_negative";
else
if(score < -0.25 && score>=-0.5)
sent = "negative";
else
if(score < -0.50 && score>-0.75)
sent = "moderately_negative";
else
if(score<=-0.75)
sent = "strong_negative";
_dict.put(word, score);
}
}
catch(Exception e){e.printStackTrace();}
}
public Double extract(String word)
{
Double total = new Double(0);
if(_dict.get(word+"#n") != null)
total = _dict.get(word+"#n") + total;
if(_dict.get(word+"#a") != null)
total = _dict.get(word+"#a") + total;
if(_dict.get(word+"#r") != null)
total = _dict.get(word+"#r") + total;
if(_dict.get(word+"#v") != null)
total = _dict.get(word+"#v") + total;
return total;
}
public static String SentiWord(String stri) {
SWN3 test = new SWN3();
String sentence=stri;
String[] words = sentence.split("\s+");
double totalScore = 0;
for(String word : words) {
word = word.replaceAll("([^a-zA-Z\s])", "");
if (test.extract(word) == null)
continue;
totalScore += test.extract(word);
}
String sent = "";
if(totalScore>=0.75)
sent = "strong_positive";
else
if(totalScore > 0.25 && totalScore<0.75)
sent = "positive";
....
....
return sent;
}
}
这是我的 Pos Tagger 方法:
public class TagText {
public static void main(String[] args) throws IOException,
ClassNotFoundException {
// Initialize the tagger
MaxentTagger tagger = new MaxentTagger("taggers/english-left3words-distsim.tagger");
// The sample string
String sample = "This is a sample text";
// The tagged string
String tagged = tagger.tagString(sample);
//output the tagged sample string onto your console
System.out.println("Input: " + sample);
System.out.println("Output: "+ tagged);
}
}
我需要将 POS Tagger 与 SentiwordNet.I 集成,我正在尝试制作一个用于情感分析的系统。现在这个 SentiwordNet 代码在没有 pos 标记的情况下工作正常,但没有给出好的结果。我就是想不通。请帮忙
您可以像这样调整 SWN3
中的 extract
方法:
public Double extract(String word, String tail) {
if (tail.contains("NN") || tail.contains("NNS")
|| tail.contains("NNP")
|| tail.contains("NNPS"))
return _dict.get(word + "#n");
else if (tail.contains("VB") || tail.contains("VBD")
|| tail.contains("VBG") || tail.contains("VBN")
|| tail.contains("VBP") || tail.contains("VBZ"))
return _dict.get(word + "#v");
else if (tail.contains("JJ") || tail.contains("JJR")
|| tail.contains("JJS"))
return _dict.get(word + "#a");
else if (tail.contains("RB") || tail.contains("RBR")
|| tail.contains("RBS"))
return _dict.get(word + "#r");
else
return null;
}
它将 tags 映射到 SentiWordNet
中定义的单词类型。我建议像这样更改您的主要方法:
public static void main(String[] args) {
MaxentTagger tagger = new MaxentTagger("files/english-left3words-distsim.tagger");
//String sample = "This is a sample text";
String sample = "It works much better with this great example!";
sample = sample.replaceAll("([^a-zA-Z\s])", "");
String[] words = sample.split("\s+");
String taggedSample = tagger.tagString(sample);
String[] taggedWords = taggedSample.split("\s+");
System.out.println(tagger.tagString(sample));
double totalScore = 0;
SWN3 test = new SWN3();
System.out.println("-----------");
for (int i=0; i<taggedWords.length;i++) {
String tail = taggedWords[i].substring(words[i].length() + 1);
Double score = null;
if(tail!=null{
score = test.extract(words[i], tail);
System.out.println(taggedWords[i] + "\t" + words[i] + "\t" + tail + "\t" + score);
}
if (score == null)
continue;
totalScore += score;
}
System.out.println("-----------");
System.out.println(totalScore);
}
我在 sample
中使用了另一个句子,它的效果更好。请注意,单独标记句子和标记单词可能会导致不同的结果。
希望对您有所帮助。
Here is my SentiWorNet Algo:
public class SWN3 {
private String pathToSWN = "C:/Users/RAHUL/Desktop/SWN/SentiWordNet_3.0.0.txt";
private HashMap<String, Double>_dict;
public SWN3(){
_dict = new HashMap<String, Double>();
HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>();
try{
BufferedReader csv = new BufferedReader(new FileReader(pathToSWN));
String line = "";
while((line = csv.readLine()) != null)
{
String[] data = line.split("\t");
Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]);
String[] words = data[4].split(" ");
for(String w:words)
{
String[] w_n = w.split("#");
w_n[0] += "#"+data[0];
int index = Integer.parseInt(w_n[1])-1;
if(_temp.containsKey(w_n[0]))
{
Vector<Double> v = _temp.get(w_n[0]);
if(index>v.size())
for(int i = v.size();i<index; i++)
v.add(0.0);
v.add(index, score);
_temp.put(w_n[0], v);
}
else
{
Vector<Double> v = new Vector<Double>();
for(int i = 0;i<index; i++)
v.add(0.0);
v.add(index, score);
_temp.put(w_n[0], v);
}
}
}
Set<String> temp = _temp.keySet();
for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) {
String word = iterator.next();
Vector<Double> v = _temp.get(word);
double score = 0.0;
double sum = 0.0;
for(int i = 0; i < v.size(); i++)
score += ((double)1/(double)(i+1))*v.get(i);
for(int i = 1; i<=v.size(); i++)
sum += (double)1/(double)i;
score /= sum;
String sent = "";
if(score>=0.75)
sent = "strong_positive";
else
if(score > 0.50 && score<0.75)
sent = "moderately_positive";
else
if(score > 0.25 && score>=0.50)
sent = "positive";
else
if(score > 0 && score>=0.25)
sent = "weak_positive";
else
if(score < 0 && score>=-0.25)
sent = "weak_negative";
else
if(score < -0.25 && score>=-0.5)
sent = "negative";
else
if(score < -0.50 && score>-0.75)
sent = "moderately_negative";
else
if(score<=-0.75)
sent = "strong_negative";
_dict.put(word, score);
}
}
catch(Exception e){e.printStackTrace();}
}
public Double extract(String word)
{
Double total = new Double(0);
if(_dict.get(word+"#n") != null)
total = _dict.get(word+"#n") + total;
if(_dict.get(word+"#a") != null)
total = _dict.get(word+"#a") + total;
if(_dict.get(word+"#r") != null)
total = _dict.get(word+"#r") + total;
if(_dict.get(word+"#v") != null)
total = _dict.get(word+"#v") + total;
return total;
}
public static String SentiWord(String stri) {
SWN3 test = new SWN3();
String sentence=stri;
String[] words = sentence.split("\s+");
double totalScore = 0;
for(String word : words) {
word = word.replaceAll("([^a-zA-Z\s])", "");
if (test.extract(word) == null)
continue;
totalScore += test.extract(word);
}
String sent = "";
if(totalScore>=0.75)
sent = "strong_positive";
else
if(totalScore > 0.25 && totalScore<0.75)
sent = "positive";
....
....
return sent;
}
}
这是我的 Pos Tagger 方法:
public class TagText {
public static void main(String[] args) throws IOException,
ClassNotFoundException {
// Initialize the tagger
MaxentTagger tagger = new MaxentTagger("taggers/english-left3words-distsim.tagger");
// The sample string
String sample = "This is a sample text";
// The tagged string
String tagged = tagger.tagString(sample);
//output the tagged sample string onto your console
System.out.println("Input: " + sample);
System.out.println("Output: "+ tagged);
}
}
我需要将 POS Tagger 与 SentiwordNet.I 集成,我正在尝试制作一个用于情感分析的系统。现在这个 SentiwordNet 代码在没有 pos 标记的情况下工作正常,但没有给出好的结果。我就是想不通。请帮忙
您可以像这样调整 SWN3
中的 extract
方法:
public Double extract(String word, String tail) {
if (tail.contains("NN") || tail.contains("NNS")
|| tail.contains("NNP")
|| tail.contains("NNPS"))
return _dict.get(word + "#n");
else if (tail.contains("VB") || tail.contains("VBD")
|| tail.contains("VBG") || tail.contains("VBN")
|| tail.contains("VBP") || tail.contains("VBZ"))
return _dict.get(word + "#v");
else if (tail.contains("JJ") || tail.contains("JJR")
|| tail.contains("JJS"))
return _dict.get(word + "#a");
else if (tail.contains("RB") || tail.contains("RBR")
|| tail.contains("RBS"))
return _dict.get(word + "#r");
else
return null;
}
它将 tags 映射到 SentiWordNet
中定义的单词类型。我建议像这样更改您的主要方法:
public static void main(String[] args) {
MaxentTagger tagger = new MaxentTagger("files/english-left3words-distsim.tagger");
//String sample = "This is a sample text";
String sample = "It works much better with this great example!";
sample = sample.replaceAll("([^a-zA-Z\s])", "");
String[] words = sample.split("\s+");
String taggedSample = tagger.tagString(sample);
String[] taggedWords = taggedSample.split("\s+");
System.out.println(tagger.tagString(sample));
double totalScore = 0;
SWN3 test = new SWN3();
System.out.println("-----------");
for (int i=0; i<taggedWords.length;i++) {
String tail = taggedWords[i].substring(words[i].length() + 1);
Double score = null;
if(tail!=null{
score = test.extract(words[i], tail);
System.out.println(taggedWords[i] + "\t" + words[i] + "\t" + tail + "\t" + score);
}
if (score == null)
continue;
totalScore += score;
}
System.out.println("-----------");
System.out.println(totalScore);
}
我在 sample
中使用了另一个句子,它的效果更好。请注意,单独标记句子和标记单词可能会导致不同的结果。
希望对您有所帮助。