Commit 5535acbc authored by mahdisellami's avatar mahdisellami
Browse files

Added German NER

parent d21204c2
......@@ -85,6 +85,12 @@
<version>3.9.1</version>
<classifier>models</classifier> <!-- will get the dependent model jars -->
</dependency>
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>3.9.1</version>
<classifier>models-german</classifier> <!-- will get the dependent model jars -->
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
......
package org.tmms.classification.resources;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicLong;
import javax.ws.rs.GET;
......@@ -16,6 +18,14 @@ import org.tmms.classification.db.NamedEntityRecognitionDAO;
import com.codahale.metrics.annotation.Timed;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.simple.*;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.simple.Sentence;
import io.dropwizard.hibernate.UnitOfWork;
import io.dropwizard.jersey.params.LongParam;
......@@ -44,13 +54,47 @@ public class NamedEntityRecognitionResource {
public NamedEntityRecognition analyseNER(
@ApiParam(value = "input text data", required = true) @QueryParam("data") String data) {
Sentence sent = new Sentence(data);
List<String> nerTags = sent.nerTags();
Properties germanProperties = new Properties();
germanProperties.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner");
germanProperties.setProperty("tokenize.language", "en");
germanProperties.setProperty("ner.applyNumericClassifiers", "false");
germanProperties.setProperty("ner.useSUTime", "false");
StanfordCoreNLP pipeline = new StanfordCoreNLP(germanProperties);
CoreDocument document = new CoreDocument(data);
pipeline.annotate(document);
List<String> ners = new ArrayList<String>();
for (CoreSentence sentence : document.sentences()) {
ners.addAll(sentence.nerTags());
}
return nerDAO.insert(new NamedEntityRecognition(data, ners.toString()));
}
// String result = classifier.classifyString(data);
@GET()
@Path("/de")
@Timed
@UnitOfWork
@ApiOperation("Returns the Named-Entity-Recognition result of the input text")
public NamedEntityRecognition analyseNERde(
@ApiParam(value = "input text data", required = true) @QueryParam("data") String data) {
final String value = nerTags.toString();
return nerDAO.insert(new NamedEntityRecognition(data, value));
Properties germanProperties = new Properties();
germanProperties.setProperty("annotators", "tokenize,ssplit,pos,ner");
germanProperties.setProperty("tokenize.language", "de");
germanProperties.setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/german/german-hgc.tagger");
germanProperties.setProperty("ner.model", "edu/stanford/nlp/models/ner/german.conll.germeval2014.hgc_175m_600.crf.ser.gz");
germanProperties.setProperty("ner.applyNumericClassifiers", "false");
// germanProperties.setProperty("ner.applyFineGrained", "false");
germanProperties.setProperty("ner.useSUTime", "false");
StanfordCoreNLP pipeline = new StanfordCoreNLP(germanProperties);
CoreDocument document = new CoreDocument(data);
pipeline.annotate(document);
List<String> ners = new ArrayList<String>();
for (CoreSentence sentence : document.sentences()) {
ners.addAll(sentence.nerTags());
}
return nerDAO.insert(new NamedEntityRecognition(data, ners.toString()));
}
@GET
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment