Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
civitas-digitalis
CivDig-DigitalAssisstant
Experiment-UHH
experiment-uhh-ideadb
Commits
5535acbc
Commit
5535acbc
authored
Jun 18, 2018
by
mahdisellami
Browse files
Added German NER
parent
d21204c2
Changes
2
Hide whitespace changes
Inline
Side-by-side
pom.xml
View file @
5535acbc
...
...
@@ -85,6 +85,12 @@
<version>
3.9.1
</version>
<classifier>
models
</classifier>
<!-- will get the dependent model jars -->
</dependency>
<dependency>
<groupId>
edu.stanford.nlp
</groupId>
<artifactId>
stanford-corenlp
</artifactId>
<version>
3.9.1
</version>
<classifier>
models-german
</classifier>
<!-- will get the dependent model jars -->
</dependency>
<dependency>
<groupId>
org.apache.opennlp
</groupId>
<artifactId>
opennlp-tools
</artifactId>
...
...
src/main/java/org/tmms/classification/resources/NamedEntityRecognitionResource.java
View file @
5535acbc
package
org.tmms.classification.resources
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Optional
;
import
java.util.Properties
;
import
java.util.concurrent.atomic.AtomicLong
;
import
javax.ws.rs.GET
;
...
...
@@ -16,6 +18,14 @@ import org.tmms.classification.db.NamedEntityRecognitionDAO;
import
com.codahale.metrics.annotation.Timed
;
import
edu.stanford.nlp.io.IOUtils
;
import
edu.stanford.nlp.ling.CoreAnnotations
;
import
edu.stanford.nlp.pipeline.*
;
import
edu.stanford.nlp.simple.*
;
import
edu.stanford.nlp.trees.*
;
import
edu.stanford.nlp.util.CoreMap
;
import
edu.stanford.nlp.util.PropertiesUtils
;
import
edu.stanford.nlp.util.StringUtils
;
import
edu.stanford.nlp.simple.Sentence
;
import
io.dropwizard.hibernate.UnitOfWork
;
import
io.dropwizard.jersey.params.LongParam
;
...
...
@@ -44,13 +54,47 @@ public class NamedEntityRecognitionResource {
public
NamedEntityRecognition
analyseNER
(
@ApiParam
(
value
=
"input text data"
,
required
=
true
)
@QueryParam
(
"data"
)
String
data
)
{
Sentence
sent
=
new
Sentence
(
data
);
List
<
String
>
nerTags
=
sent
.
nerTags
();
Properties
germanProperties
=
new
Properties
();
germanProperties
.
setProperty
(
"annotators"
,
"tokenize,ssplit,pos,lemma,ner"
);
germanProperties
.
setProperty
(
"tokenize.language"
,
"en"
);
germanProperties
.
setProperty
(
"ner.applyNumericClassifiers"
,
"false"
);
germanProperties
.
setProperty
(
"ner.useSUTime"
,
"false"
);
StanfordCoreNLP
pipeline
=
new
StanfordCoreNLP
(
germanProperties
);
CoreDocument
document
=
new
CoreDocument
(
data
);
pipeline
.
annotate
(
document
);
List
<
String
>
ners
=
new
ArrayList
<
String
>();
for
(
CoreSentence
sentence
:
document
.
sentences
())
{
ners
.
addAll
(
sentence
.
nerTags
());
}
return
nerDAO
.
insert
(
new
NamedEntityRecognition
(
data
,
ners
.
toString
()));
}
// String result = classifier.classifyString(data);
@GET
()
@Path
(
"/de"
)
@Timed
@UnitOfWork
@ApiOperation
(
"Returns the Named-Entity-Recognition result of the input text"
)
public
NamedEntityRecognition
analyseNERde
(
@ApiParam
(
value
=
"input text data"
,
required
=
true
)
@QueryParam
(
"data"
)
String
data
)
{
final
String
value
=
nerTags
.
toString
();
return
nerDAO
.
insert
(
new
NamedEntityRecognition
(
data
,
value
));
Properties
germanProperties
=
new
Properties
();
germanProperties
.
setProperty
(
"annotators"
,
"tokenize,ssplit,pos,ner"
);
germanProperties
.
setProperty
(
"tokenize.language"
,
"de"
);
germanProperties
.
setProperty
(
"pos.model"
,
"edu/stanford/nlp/models/pos-tagger/german/german-hgc.tagger"
);
germanProperties
.
setProperty
(
"ner.model"
,
"edu/stanford/nlp/models/ner/german.conll.germeval2014.hgc_175m_600.crf.ser.gz"
);
germanProperties
.
setProperty
(
"ner.applyNumericClassifiers"
,
"false"
);
// germanProperties.setProperty("ner.applyFineGrained", "false");
germanProperties
.
setProperty
(
"ner.useSUTime"
,
"false"
);
StanfordCoreNLP
pipeline
=
new
StanfordCoreNLP
(
germanProperties
);
CoreDocument
document
=
new
CoreDocument
(
data
);
pipeline
.
annotate
(
document
);
List
<
String
>
ners
=
new
ArrayList
<
String
>();
for
(
CoreSentence
sentence
:
document
.
sentences
())
{
ners
.
addAll
(
sentence
.
nerTags
());
}
return
nerDAO
.
insert
(
new
NamedEntityRecognition
(
data
,
ners
.
toString
()));
}
@GET
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment