Solve challenge 4
This commit is contained in:
parent
2503961778
commit
27539840e3
4 changed files with 404 additions and 4 deletions
17
utils.c
17
utils.c
|
@ -38,15 +38,24 @@ unsigned char *bytes_to_base64(unsigned char *out,
|
|||
}
|
||||
|
||||
double frequency_score(unsigned char *buf, size_t len) {
|
||||
static const double english_freqs[26] = {
|
||||
static const double english_freqs[27] = {
|
||||
0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, // A-G
|
||||
0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, // H-N
|
||||
0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, // O-U
|
||||
0.00978, 0.02360, 0.00150, 0.01974, 0.00074 // V-Z
|
||||
0.00978, 0.02360, 0.00150, 0.01974, 0.00074, // V-Z
|
||||
0.19182 // space
|
||||
};
|
||||
|
||||
unsigned int counts[26] = {0};
|
||||
unsigned int counts[27] = {0};
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
if (!isprint(buf[i]) && buf[i] != '\n') {
|
||||
// If character is not printable, it's definitely not English.
|
||||
return INFINITY;
|
||||
}
|
||||
if (buf[i] == ' ') {
|
||||
counts[26]++;
|
||||
continue;
|
||||
}
|
||||
unsigned char c = tolower(buf[i]) - 'a';
|
||||
if (c < 26) {
|
||||
counts[c]++;
|
||||
|
@ -54,7 +63,7 @@ double frequency_score(unsigned char *buf, size_t len) {
|
|||
}
|
||||
|
||||
double chi2 = 0;
|
||||
for (size_t i = 0; i < 26; ++i) {
|
||||
for (size_t i = 0; i < 27; ++i) {
|
||||
double expected = len * english_freqs[i];
|
||||
chi2 += pow(counts[i] - expected, 2) / expected;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue