from IPython.display import display, Markdown
%pip install -U wn
Collecting wn Downloading wn-0.9.1-py3-none-any.whl (75 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 75.3/75.3 kB 6.7 MB/s eta 0:00:00 Requirement already satisfied: requests in /opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages (from wn) (2.28.1) Collecting tomli Downloading tomli-2.0.1-py3-none-any.whl (12 kB) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages (from requests->wn) (1.26.11) Requirement already satisfied: charset-normalizer<3,>=2 in /opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages (from requests->wn) (2.1.1) Requirement already satisfied: certifi>=2017.4.17 in /opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages (from requests->wn) (2022.6.15) Requirement already satisfied: idna<4,>=2.5 in /opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages (from requests->wn) (3.3) Installing collected packages: tomli, wn Successfully installed tomli-2.0.1 wn-0.9.1 Note: you may need to restart the kernel to use updated packages.
import wn
wn.download("ewn:2020")
Download [##############################] (13643357/13643357 bytes) Complete Read [##############################] (1248496/1248496) Added ewn:2020 (English WordNet)
PosixPath('/home/runner/.wn_data/downloads/3d808000775f658597bc71e9d4870059ccc7965f')
wn.words("cat")
[Word('ewn-cat-n'), Word('ewn-cat-v'), Word('ewn-CAT-n')]
wn.synsets("cat")
[Synset('ewn-02124272-n'), Synset('ewn-10172934-n'), Synset('ewn-09919605-n'), Synset('ewn-03614083-n'), Synset('ewn-02989061-n'), Synset('ewn-02986962-n'), Synset('ewn-02130460-n'), Synset('ewn-01414524-v'), Synset('ewn-00076153-v'), Synset('ewn-00903174-n')]
for ss in wn.synsets("cat"):
print(ss.words())
[Word('ewn-cat-n'), Word('ewn-true_cat-n')] [Word('ewn-cat-n'), Word('ewn-guy-n'), Word('ewn-hombre-n'), Word('ewn-sod-n'), Word('ewn-bozo-n')] [Word('ewn-cat-n')] [Word('ewn-African_tea-n'), Word('ewn-qat-n'), Word('ewn-Arabian_tea-n'), Word('ewn-cat-n'), Word('ewn-khat-n'), Word('ewn-quat-n'), Word('ewn-kat-n')] [Word('ewn-cat-n'), Word('ewn-cat-o-ap--nine-tails-n')] [Word('ewn-cat-n'), Word('ewn-Caterpillar-n')] [Word('ewn-cat-n'), Word('ewn-big_cat-n')] [Word('ewn-cat-v')] [Word('ewn-cast-v'), Word('ewn-purge-v'), Word('ewn-chuck-v'), Word('ewn-honk-v'), Word('ewn-regurgitate-v'), Word('ewn-cat-v'), Word('ewn-disgorge-v'), Word('ewn-barf-v'), Word('ewn-regorge-v'), Word('ewn-spew-v'), Word('ewn-sick-v'), Word('ewn-vomit-v'), Word('ewn-puke-v'), Word('ewn-be_sick-v'), Word('ewn-upchuck-v'), Word('ewn-retch-v'), Word('ewn-spue-v'), Word('ewn-vomit_up-v'), Word('ewn-throw_up-v')] [Word('ewn-CT-n'), Word('ewn-computerized_axial_tomography-n'), Word('ewn-CAT-n'), Word('ewn-computed_axial_tomography-n'), Word('ewn-computerized_tomography-n'), Word('ewn-computed_tomography-n')]
for ss in wn.synsets("cat"):
print(ss.definition())
print([w.lemma() for w in ss.words()])
print()
feline mammal usually having thick soft fur and no ability to roar: domestic cats; wildcats ['cat', 'true cat'] an informal term for a youth or man ['cat', 'guy', 'hombre', 'sod', 'bozo'] a spiteful woman gossip ['cat'] the leaves of the shrub Catha edulis which are chewed like tobacco or used to make tea; has the effect of a euphoric stimulant ['African tea', 'qat', 'Arabian tea', 'cat', 'khat', 'quat', 'kat'] a whip with nine knotted cords ['cat', "cat-o'-nine-tails"] a large tracked vehicle that is propelled by two endless metal belts; frequently used for moving earth in construction and farm work ['cat', 'Caterpillar'] any of several large cats typically able to roar and living in the wild ['cat', 'big cat'] beat with a cat-o'-nine-tails ['cat'] eject the contents of the stomach through the mouth ['cast', 'purge', 'chuck', 'honk', 'regurgitate', 'cat', 'disgorge', 'barf', 'regorge', 'spew', 'sick', 'vomit', 'puke', 'be sick', 'upchuck', 'retch', 'spue', 'vomit up', 'throw up'] a method of examining body organs by scanning them with X rays and using a computer to construct a series of cross-sectional scans along a single axis ['CT', 'computerized axial tomography', 'CAT', 'computed axial tomography', 'computerized tomography', 'computed tomography']
wn.synsets("cat")[0].hypernyms()[0].words()
[Word('ewn-feline-n'), Word('ewn-felid-n')]
wn.synsets("cat")[0].hyponyms()[0].words()
[Word('ewn-Felis_domesticus-n'), Word('ewn-Felis_catus-n'), Word('ewn-house_cat-n'), Word('ewn-domestic_cat-n')]
wn.synsets("alphabet")[0].get_related("mero_member")[0].words()
[Word('ewn-alphabetic_character-n'), Word('ewn-letter_of_the_alphabet-n'), Word('ewn-letter-n')]
wn.synsets("human")[0].get_related("holo_member")[0].words()
[Word('ewn-genus_Homo-n')]
[ss.words() for ss in wn.synsets("bagpipe")[0].shortest_path(wn.synsets("cat")[0])]
[[Word('ewn-pipe-n')], [Word('ewn-wind-n'), Word('ewn-wind_instrument-n')], [Word('ewn-instrument-n'), Word('ewn-musical_instrument-n')], [Word('ewn-device-n')], [Word('ewn-instrumentality-n'), Word('ewn-instrumentation-n')], [Word('ewn-artifact-n'), Word('ewn-artefact-n')], [Word('ewn-whole-n'), Word('ewn-unit-n')], [Word('ewn-animate_thing-n'), Word('ewn-living_thing-n')], [Word('ewn-being-n'), Word('ewn-organism-n')], [Word('ewn-fauna-n'), Word('ewn-beast-n'), Word('ewn-animate_being-n'), Word('ewn-brute-n'), Word('ewn-creature-n'), Word('ewn-animal-n')], [Word('ewn-chordate-n')], [Word('ewn-craniate-n'), Word('ewn-vertebrate-n')], [Word('ewn-mammalian-n'), Word('ewn-mammal-n')], [Word('ewn-eutherian_mammal-n'), Word('ewn-placental-n'), Word('ewn-placental_mammal-n'), Word('ewn-eutherian-n')], [Word('ewn-carnivore-n')], [Word('ewn-feline-n'), Word('ewn-felid-n')], [Word('ewn-cat-n'), Word('ewn-true_cat-n')]]
import wn.similarity
wn.similarity.path(wn.synsets("bagpipe")[0], wn.synsets("cat")[0])
0.05555555555555555
%pip install gensim
Collecting gensim Downloading gensim-4.2.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (24.0 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 24.0/24.0 MB 50.0 MB/s eta 0:00:00 Requirement already satisfied: numpy>=1.17.0 in /opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages (from gensim) (1.23.2) Requirement already satisfied: scipy>=0.18.1 in /opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages (from gensim) (1.9.0) Collecting smart-open>=1.8.1 Downloading smart_open-6.1.0-py3-none-any.whl (58 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 58.6/58.6 kB 13.0 MB/s eta 0:00:00 Installing collected packages: smart-open, gensim Successfully installed gensim-4.2.0 smart-open-6.1.0 Note: you may need to restart the kernel to use updated packages.
import gensim.downloader as api
wv = api.load("glove-wiki-gigaword-50")
[==================================================] 100.0% 66.0/66.0MB downloaded
wv.similarity("cat", "bagpipe")
0.24249281
1. Visualiser avec matplotlib la corrélation entre similarité path dans ewn:2020
et similarité cosinus dans glove-wiki-gigaword-50
en considérant des paires de mots choisies aléatoirement dans l'intersection de leurs vocabulaires.
2. Déterminer numériquement cette corrélation, par exemple avec scipy.stats.linregress
Une analogie est relation du type « doigt est à main ce que orteil est à pied », ce qu'on note parfois doigt:main :: orteil:pied
. Chercher dans WordNet des analogies de ce type en utilisant la méthode wn.Synset.relations
.