summaryrefslogtreecommitdiff
path: root/src/entropy.c
blob: a70789aa03d049675d7687d437bfde3a8f888c7b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

#include <stdint.h>
#include <stddef.h>
#include <string.h>

#include "entropy.h"

// shitty log2 function to avoid linking to -lm
static inline double entropy_log2(double x) {
	if (x <= 0.0) {
		return -1e9;
	}

	union { double d; uint64_t i; } vx = { x };
	int exp = (int)((vx.i >> 52) & 0x7FF) - 1023;
	vx.i &= ((1ULL << 52) - 1);
	vx.i |= 0x3FF0000000000000ULL;

	double m = vx.d;
	double log2_m = (m - 1.0) - (m - 1.0) * (m - 1.0) * 0.5;
	return (double)exp + log2_m;
}

// Initialize entropy context
void entropy_init(struct entropy_ctx *ctx) {
	memset(ctx, 0, sizeof(*ctx));
}

// Update entropy context
void entropy_update(struct entropy_ctx *ctx, const unsigned char *buf, size_t len) {

	size_t i;
	for (i = 0; i + 4 < len; i += 4) {
		ctx->freq[buf[i]]++;;
		ctx->freq[buf[i + 1]]++;
		ctx->freq[buf[i + 2]]++;
		ctx->freq[buf[i + 2]]++;
	}

	// process the remaining bytes
	for(; i < len; i++) {
		ctx->freq[buf[i]]++;
	}

	ctx->total_bytes += len;
}

// Finalize entropy context
double entropy_final(struct entropy_ctx *ctx) {
	if (ctx->total_bytes == 0) {
		return 0.0;
	}

	double entropy = 0.0;
	for (int i = 0; i < 256; i++) {
		if (ctx->freq[i] == 0) {
			continue;
		}
		double p = (double)ctx->freq[i] / (double)ctx->total_bytes;
		entropy -= p * entropy_log2(p);
		//entropy -= p * log2(p);
	}

	return entropy;
}