Создайте Bigram в Python
from nltk.tokenize import word_tokenize
from nltk.util import ngrams
def get_ngrams(text, n ):
n_grams = ngrams(word_tokenize(text), n)
return [ '_'.join(grams) for grams in n_grams]
get_ngrams("this is a sentence", 2)
Lazy Leopard