Skip to content

Commit 6927c90

Browse files
committed
added a small tool called bin which alleviates the work to find patterns, encodings, etc in binary files
1 parent 672fa83 commit 6927c90

File tree

2 files changed

+220
-0
lines changed

2 files changed

+220
-0
lines changed

bin/bin.R

+165
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
library(ggplot2)
2+
library(reshape2)
3+
Rcpp::sourceCpp("./bin/bin.cpp")
4+
5+
# NOTE: print raw buffer pasted
6+
pr <- function(v, sep) {
7+
v <- as.character(v)
8+
v <- paste(v, collapse = "")
9+
cat(v, sep)
10+
}
11+
12+
bin <- R6::R6Class(
13+
"bin",
14+
public = list(
15+
path = NULL,
16+
raw = NULL,
17+
raw_subset = NULL,
18+
initialize = function(path) {
19+
self$path <- path
20+
con <- file(path, "rb")
21+
size <- file.info(path)$size
22+
self$raw <- readBin(con, what = "raw", n = size)
23+
close(con)
24+
},
25+
to_char = function(elem) {
26+
rawToChar(elem)
27+
},
28+
to_uint8 = function(elem) {
29+
rawToChar(elem) |> CastToUint8()
30+
},
31+
to_uint16 = function(vec) {
32+
vec <- as.character(vec)
33+
vec <- split(vec, rep(seq_len(length(vec) / 2), each = 2))
34+
res <- lapply(vec, function(x) {
35+
# x <- rev(x)
36+
CastToUint16(x)
37+
})
38+
names <- sapply(vec, function(i) {
39+
paste(i, collapse = "")
40+
})
41+
names(res) <- names
42+
res
43+
},
44+
to_uint32 = function(vec) {
45+
vec <- as.character(vec)
46+
vec <- split(vec, rep(seq_len(length(vec) / 4), each = 4))
47+
res <- lapply(vec, CastToUint32)
48+
names <- sapply(vec, function(i) {
49+
paste(i, collapse = "")
50+
})
51+
names(res) <- names
52+
res
53+
},
54+
print_char = function(idx) {
55+
cat(" ")
56+
for (i in idx:(idx + 7)) {
57+
temp <- self$to_char(self$raw_subset[i])
58+
if (temp == "") temp <- "."
59+
cat(temp, "\t")
60+
}
61+
cat("\n")
62+
},
63+
print_uint8 = function(idx) {
64+
cat(" ")
65+
for (i in idx:(idx + 7)) {
66+
temp <- self$to_uint8(self$raw_subset[i])
67+
if (temp == "") temp <- "."
68+
cat(temp, "\t")
69+
}
70+
cat("\n")
71+
},
72+
print_uint16 = function(idx) {
73+
cat(" ")
74+
temp <- self$to_uint16(self$raw_subset[idx:(idx + 7)])
75+
for (i in seq_along(temp)) {
76+
pr(names(temp)[i], "\t")
77+
cat(temp[[i]], "\t")
78+
}
79+
cat("\n")
80+
},
81+
print_uint32 = function(idx) {
82+
cat(" ")
83+
temp <- self$to_uint32(self$raw_subset[idx:(idx + 7)])
84+
for (i in seq_along(temp)) {
85+
pr(names(temp)[i], "\t\t")
86+
cat(temp[[i]], "\t")
87+
}
88+
cat("\n")
89+
},
90+
print = function(range) {
91+
self$raw_subset <- self$raw[range]
92+
for (i in seq_along(self$raw_subset)) {
93+
cat(self$raw_subset[i], "\t")
94+
if (i %% 8 == 0) {
95+
cat("\n")
96+
self$print_char(i - 7)
97+
self$print_uint8(i - 7)
98+
self$print_uint16(i - 7)
99+
self$print_uint32(i - 7)
100+
cat("\n")
101+
cat("Elems: ", i + 1, " - ", i + 8, "\n")
102+
}
103+
}
104+
},
105+
plot = function(range, type, op = NULL) {
106+
self$raw_subset <- self$raw[range]
107+
x <- NULL
108+
y <- NULL
109+
if (type == "uint8") {
110+
y <- sapply(self$raw_subset, self$to_uint8)
111+
x <- as.character(self$raw_subset)
112+
} else if (type == "uint16") {
113+
res <- self$to_uint16(self$raw_subset)
114+
x <- names(res)
115+
y <- unlist(res)
116+
attributes(y) <- NULL
117+
} else if (type == "uint32") {
118+
res <- self$to_uint32(self$raw_subset)
119+
x <- names(res)
120+
y <- unlist(res)
121+
attributes(y) <- NULL
122+
} else {
123+
stop("found unknown type")
124+
}
125+
if (!is.null(op)) {
126+
stopifnot("op has to be a function" = is.function(op))
127+
y <- op(y)
128+
}
129+
colors <- rep(c("black", "darkred"), length.out = length(y))
130+
bp <- barplot(y, names.arg = NULL, col = colors, border = "black")
131+
text(
132+
x = bp,
133+
y = par("usr")[3] - 1,
134+
labels = x, srt = 90,
135+
cex = 0.75,
136+
adj = 1, xpd = TRUE
137+
)
138+
}
139+
)
140+
)
141+
142+
path <- "./tests/Chemstation/SVS_1025F1.D/MSD1.MS"
143+
b <- bin$new(path)
144+
# b$print(1:320)
145+
146+
range <- 753:(753 + 4951)
147+
range <- range[seq(1, length(range), 2)]
148+
range <- range[seq(2, length(range), 2)]
149+
b$plot(range, "uint16")
150+
151+
uint16_values <- b$to_uint16(b$raw_subset) |> unlist()
152+
bit_matrix <- uint16_to_bit_matrix(uint16_values)
153+
df <- melt(bit_matrix)
154+
colnames(bit_matrix) <- paste0("B_", 15:0)
155+
colnames(df) <- c("Idx", "Bit_Pos", "Value")
156+
df$Bit_Position <- as.numeric(gsub("B_", "", df$Bit_Pos))
157+
ggplot(df, aes(x = Bit_Pos, y = Idx, fill = Value)) +
158+
geom_tile(width = 1, height = 5.5) +
159+
scale_fill_gradient(low = "white", high = "black") +
160+
scale_x_reverse(breaks = 15:0) +
161+
labs(
162+
x = "Bit Position", y = "Number Index",
163+
title = "Bit Pattern of uint16 Values"
164+
) +
165+
theme_minimal()

bin/bin.cpp

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#include <Rcpp.h>
2+
#include <bitset>
3+
#include <cstddef>
4+
#include <cstdint>
5+
#include <fstream>
6+
#include <vector>
7+
8+
// [[Rcpp::export]]
9+
int CastToUint8(std::string &buffer) {
10+
const char *b = buffer.c_str();
11+
std::uint8_t res = *reinterpret_cast<const uint8_t *>(b);
12+
return res;
13+
}
14+
15+
uint8_t hexToByte(const std::string &hex) {
16+
return static_cast<uint8_t>(std::stoul(hex, nullptr, 16));
17+
}
18+
19+
// [[Rcpp::export]]
20+
int CastToUint32(Rcpp::CharacterVector buffer) {
21+
if (buffer.size() != 4) {
22+
Rcpp::stop("Expected exactly 4 hex strings representing bytes.");
23+
}
24+
uint8_t byte1 = hexToByte(Rcpp::as<std::string>(buffer[0]));
25+
uint8_t byte2 = hexToByte(Rcpp::as<std::string>(buffer[1]));
26+
uint8_t byte3 = hexToByte(Rcpp::as<std::string>(buffer[2]));
27+
uint8_t byte4 = hexToByte(Rcpp::as<std::string>(buffer[3]));
28+
uint32_t res = (byte1 << 24) | (byte2 << 16) | (byte3 << 8) | byte4;
29+
return res;
30+
}
31+
32+
// [[Rcpp::export]]
33+
std::uint16_t CastToUint16(Rcpp::CharacterVector buffer) {
34+
if (buffer.size() != 2) {
35+
std::cout << "size = " << buffer.size() << std::endl;
36+
Rcpp::stop("Expected exactly 2 hex strings representing bytes.");
37+
}
38+
uint8_t byte1 = hexToByte(Rcpp::as<std::string>(buffer[0]));
39+
uint8_t byte2 = hexToByte(Rcpp::as<std::string>(buffer[1]));
40+
uint16_t res = (byte1 << 8) | byte2;
41+
return res;
42+
}
43+
44+
// [[Rcpp::export]]
45+
Rcpp::IntegerMatrix uint16_to_bit_matrix(Rcpp::IntegerVector values) {
46+
int n = values.size();
47+
Rcpp::IntegerMatrix bit_matrix(n, 16);
48+
for (int i = 0; i < n; i++) {
49+
std::bitset<16> bits(values[i]);
50+
for (int j = 0; j < 16; j++) {
51+
bit_matrix(i, j) = bits[15 - j];
52+
}
53+
}
54+
return bit_matrix;
55+
}

0 commit comments

Comments
 (0)