import stanza


stanza.install_corenlp(dir="local/corenlp")

2022-08-21 16:14:15 INFO: Installing CoreNLP package into local/corenlp

2022-08-21 16:14:24 WARNING: For customized installation location, please set the `CORENLP_HOME` environment variable to the location of the installation. In Unix, this is done with `export CORENLP_HOME=local/corenlp`.


import os
os.environ["CORENLP_HOME"] = "local/corenlp"


from stanza.server import CoreNLPClient


with CoreNLPClient(
    annotators=["tokenize","ssplit","pos","lemma","ner", "parse", "depparse","coref"], 
    memory="4G",
    endpoint="http://localhost:9001",
    be_quiet=True,
) as client:
    text = "Beethoven’s first music teacher was his father. Although tradition has it that Johann van Beethoven was a harsh instructor, and that the child Beethoven, “made to stand at the keyboard, was often in tears”, the Grove Dictionary of Music and Musicians claimed that no solid documentation supported this."
    document = client.annotate(text)

2022-08-21 16:14:24 INFO: Writing properties to tmp file: corenlp_server-3c09d94e1da34dbe.props
2022-08-21 16:14:24 INFO: Starting server with command: java -Xmx4G -cp local/corenlp/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 -timeout 60000 -threads 5 -maxCharLength 100000 -quiet True -serverProperties corenlp_server-3c09d94e1da34dbe.props -annotators tokenize,ssplit,pos,lemma,ner,parse,depparse,coref -preload -outputFormat serialized


document.corefChain

[chainID: 5
mention {
  mentionID: 0
  mentionType: "PROPER"
  number: "SINGULAR"
  gender: "MALE"
  animacy: "ANIMATE"
  beginIndex: 0
  endIndex: 1
  headIndex: 0
  sentenceIndex: 0
  position: 1
}
mention {
  mentionID: 6
  mentionType: "PROPER"
  number: "SINGULAR"
  gender: "MALE"
  animacy: "ANIMATE"
  beginIndex: 17
  endIndex: 18
  headIndex: 17
  sentenceIndex: 1
  position: 2
}
mention {
  mentionID: 5
  mentionType: "PROPER"
  number: "SINGULAR"
  gender: "MALE"
  animacy: "ANIMATE"
  beginIndex: 5
  endIndex: 8
  headIndex: 7
  sentenceIndex: 1
  position: 1
}
representative: 2
, chainID: 11
mention {
  mentionID: 11
  mentionType: "NOMINAL"
  number: "SINGULAR"
  gender: "UNKNOWN"
  animacy: "ANIMATE"
  beginIndex: 15
  endIndex: 18
  headIndex: 16
  sentenceIndex: 1
  position: 7
}
mention {
  mentionID: 2
  mentionType: "NOMINAL"
  number: "SINGULAR"
  gender: "UNKNOWN"
  animacy: "ANIMATE"
  beginIndex: 0
  endIndex: 5
  headIndex: 4
  sentenceIndex: 0
  position: 3
}
mention {
  mentionID: 4
  mentionType: "PRONOMINAL"
  number: "SINGULAR"
  gender: "MALE"
  animacy: "ANIMATE"
  beginIndex: 6
  endIndex: 7
  headIndex: 6
  sentenceIndex: 0
  position: 5
}
representative: 1
]


mentions_dict = dict()
for sent in document.sentence:
    for m in sent.mentions:
        mentions_dict[m.entityMentionIndex] = m
for chain in document.corefChain:
    print([mentions_dict[m.mentionID].entityMentionText for m in chain.mention])

['Beethoven', 'Beethoven', 'instructor']

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Input In [7], in <cell line: 5>()
      4         mentions_dict[m.entityMentionIndex] = m
      5 for chain in document.corefChain:
----> 6     print([mentions_dict[m.mentionID].entityMentionText for m in chain.mention])

Input In [7], in <listcomp>(.0)
      4         mentions_dict[m.entityMentionIndex] = m
      5 for chain in document.corefChain:
----> 6     print([mentions_dict[m.mentionID].entityMentionText for m in chain.mention])

KeyError: 11


document.corefChain[0].mention

[mentionID: 0
mentionType: "PROPER"
number: "SINGULAR"
gender: "MALE"
animacy: "ANIMATE"
beginIndex: 0
endIndex: 1
headIndex: 0
sentenceIndex: 0
position: 1
, mentionID: 6
mentionType: "PROPER"
number: "SINGULAR"
gender: "MALE"
animacy: "ANIMATE"
beginIndex: 17
endIndex: 18
headIndex: 17
sentenceIndex: 1
position: 2
, mentionID: 5
mentionType: "PROPER"
number: "SINGULAR"
gender: "MALE"
animacy: "ANIMATE"
beginIndex: 5
endIndex: 8
headIndex: 7
sentenceIndex: 1
position: 1
]


K = {(1, 2), (2, 3), (3, 4), (4, 5), (6, 7), (8, 9), (9, 10), (10, 11), (11, 12)}
R1 = set()
R2 = {(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12)}
R3 = {(1, 2), (2, 3), (3, 4), (4, 5), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12)}
R4 = {(1, 2), (2, 3), (3, 4), (4, 5), (6, 7), (5, 8), (8, 9), (9, 10), (10, 11), (11, 12)}

Cours 7 : Coréférence¶

Définitions générales¶

En TAL¶

Données¶

Techniques de traitement¶

Représentations¶

Détection des mentions¶

Détection des chaînes¶

Implémentations¶

Neuralcoref¶

Stanza/CoreNLP¶

Mesures d'évaluation¶

MUC¶

B³¶

CEAF¶

😢 Exercice 😢¶

Cours 7 : Coréférence¶

Définitions générales¶

En TAL¶

Données¶

Techniques de traitement¶

Représentations¶

Détection des mentions¶

Détection des chaînes¶

Implémentations¶

Neuralcoref¶

Stanza/CoreNLP¶

Mesures d'évaluation¶

MUC¶

B³¶

CEAF¶

😢 Exercice 😢¶

Cours 7 : Coréférence¶