| import kenlm | |
| from text_normalizer import normalize | |
| def document_perplexity(model, text): | |
| text = normalize(text) | |
| score = model.score(text) | |
| return 10 ** (-score / len(text.split())) | |
| # Load the language model | |
| model = kenlm.Model('../lm-v2.binary') | |
| # Test the model | |
| TEXT = """I thought I’d add a little bit of background. The previous discussion started from the result $P(B|AC) = K^{-1}P(B|C)P(A|BC) = K^{-1} P(AB|C)$ where $K=P(A|C).$ Although this is called Bayes’ theorem, the general form of it as stated here was actually first written down, not by Bayes but by Laplace.""" | |
| print(document_perplexity(model, TEXT)) # Should print out ~239 |