Update english letter frequencies

This commit is contained in:
Dimitri Lozeve 2020-06-10 17:17:36 +02:00
parent 87c2fdc95d
commit 2f206e91db

View file

@ -1,9 +1,8 @@
#include "utils.h" #include "utils.h"
#include <ctype.h> #include <ctype.h>
#include <math.h> #include <math.h>
#include <string.h>
#include <stdio.h> #include <stdio.h>
#include <string.h>
unsigned char hex_to_byte(const char c) { unsigned char hex_to_byte(const char c) {
if (isdigit(c)) { if (isdigit(c)) {
@ -77,12 +76,20 @@ size_t base64_to_bytes(unsigned char out[static 3], const char in[static 4]) {
double frequency_score(size_t len, const char buf[static len]) { double frequency_score(size_t len, const char buf[static len]) {
static const double english_freqs[27] = { static const double english_freqs[27] = {
0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, // A-G // A-Z
0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, // H-N 0.0855, 0.0160, 0.0316, 0.0387, 0.01210, 0.0218, 0.0209, 0.0496, 0.0733,
0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, // O-U 0.0022, 0.0081, 0.0421, 0.0253, 0.0717, 0.0747, 0.0207, 0.0010, 0.0633,
0.00978, 0.02360, 0.00150, 0.01974, 0.00074, // V-Z 0.0673, 0.0894, 0.0268, 0.0106, 0.0183, 0.0019, 0.0172, 0.0011,
0.19182 // space // space
}; 0.19182};
// static const double english_freqs[27] = {
// // A-Z
// 0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, 0.06094,
// 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, 0.07507, 0.01929,
// 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, 0.00978, 0.02360, 0.00150,
// 0.01974, 0.00074,
// // space
// 0.19182};
unsigned int counts[27] = {0}; unsigned int counts[27] = {0};
for (size_t i = 0; i < len; ++i) { for (size_t i = 0; i < len; ++i) {