Skip to content

Instantly share code, notes, and snippets.

@charlesBochet
Last active January 28, 2019 04:37
Show Gist options
  • Save charlesBochet/0c1825b92dd574f6632b760f57a74c84 to your computer and use it in GitHub Desktop.
Save charlesBochet/0c1825b92dd574f6632b760f57a74c84 to your computer and use it in GitHub Desktop.
# coding: utf-8
import nltk
from nltk.tag.stanford import StanfordNERTagger
# Optional
import os
java_path = "/usr/lib/jvm/java-8-oracle"
os.environ['JAVA_HOME'] = java_path
sentence = u"La première Falcon Heavy de l'entreprise SpaceX, " \
"la plus puissante fusée des Etats-Unis jamais " \
"lancée depuis plus de quarante ans, devrait bien " \
"emporter le roadster de l'entrepreneur américain, " \
"mais sur une orbite bien différente. Elon Musk a le sens du spectacle."
jar = './stanford-ner-tagger/stanford-ner.jar'
model = './stanford-ner-tagger/trained-ner-model-french.ser.gz'
ner_tagger = StanfordNERTagger(model, jar, encoding='utf8')
words = nltk.word_tokenize(sentence)
print(ner_tagger.tag(words))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment