Skip to content

Commit d4e14f9

Browse files
committed
Implement entity_id. Issue #155
This implements the 12 byte entity id as defined in https://www.elastic.co/docs/reference/ecs/ecs-process#field-process-entity-id We can't depend on dynamic linking of md or openssl, so include a standlone MIT licensed sha256 implementation from https://github.com/ilvn/SHA256, they claim to be formally verified, so that's something. We now also have to link against resolv so we can get the base64 functions, that's ok, beats already links against it. Musl doesn't have b64_ntop so we include a replacement guarded by NO_B64. This is a WIP as I want to make sure we compute the very same entity_id as gosysinfo and friends.
1 parent 17f9c73 commit d4e14f9

File tree

7 files changed

+806
-2
lines changed

7 files changed

+806
-2
lines changed

Makefile

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ CPPFLAGS?= -D_GNU_SOURCE
4343
ifndef SYSLIB
4444
CPPFLAGS+= -Iinclude/usr/include
4545
endif
46+
ifdef NO_B64
47+
CPPFLAGS+= -DNO_B64
48+
endif
4649

4750
CDIAGFLAGS+= -Wall
4851
CDIAGFLAGS+= -Wextra
@@ -90,13 +93,15 @@ LIBQUARK_DEPS+= $(EEBPF_FILES) include
9093
endif
9194
LIBQUARK_DEPS:= $(filter-out manpages.h, $(LIBQUARK_DEPS))
9295
LIBQUARK_SRCS:= \
96+
base64.c \
9397
bpf_queue.c \
9498
btf.c \
9599
btfhub.c \
96100
compat.c \
97101
kprobe_queue.c \
98102
quark.c \
99-
qutil.c
103+
qutil.c \
104+
sha256.c
100105
LIBQUARK_OBJS:= $(patsubst %.c,%.o,$(LIBQUARK_SRCS))
101106
LIBQUARK_STATIC:= libquark.a
102107
LIBQUARK_STATIC_BIG:= libquark_big.a
@@ -109,6 +114,11 @@ LIBQUARK_TARGET=$(LIBQUARK_STATIC)
109114
EXTRA_LDFLAGS+= -lbpf
110115
endif
111116

117+
# for b64_ntop()
118+
ifndef NO_B64
119+
EXTRA_LDFLAGS+= -lresolv
120+
endif
121+
112122
# ZLIB
113123
ZLIB_SRC:= zlib
114124
ZLIB_FILES:= $(shell find $(ZLIB_SRC) \(\
@@ -287,7 +297,7 @@ alpine: alpine-image clean-all
287297
$(call msg,ALPINE-DOCKER-RUN,Dockerfile)
288298
$(Q)$(DOCKER) run \
289299
$(ALPINE_RUN_ARGS) $(SHELL) \
290-
-c "make -C $(PWD) all initramfs.gz EXTRA_LDFLAGS=-lfts"
300+
-c "make -C $(PWD) all initramfs.gz EXTRA_LDFLAGS=-lfts NO_B64=y"
291301

292302
alpine-image: clean-all
293303
$(call msg,ALPINE-IMAGE,Dockerfile.alpine)

base64.c

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
/* $OpenBSD: base64.c,v 1.5 2006/10/21 09:55:03 otto Exp $ */
2+
3+
/*
4+
* Copyright (c) 1996 by Internet Software Consortium.
5+
*
6+
* Permission to use, copy, modify, and distribute this software for any
7+
* purpose with or without fee is hereby granted, provided that the above
8+
* copyright notice and this permission notice appear in all copies.
9+
*
10+
* THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
11+
* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
12+
* OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
13+
* CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14+
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15+
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
16+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
17+
* SOFTWARE.
18+
*/
19+
20+
/*
21+
* Portions Copyright (c) 1995 by International Business Machines, Inc.
22+
*
23+
* International Business Machines, Inc. (hereinafter called IBM) grants
24+
* permission under its copyrights to use, copy, modify, and distribute this
25+
* Software with or without fee, provided that the above copyright notice and
26+
* all paragraphs of this notice appear in all copies, and that the name of IBM
27+
* not be used in connection with the marketing of any product incorporating
28+
* the Software or modifications thereof, without specific, written prior
29+
* permission.
30+
*
31+
* To the extent it has a right to do so, IBM grants an immunity from suit
32+
* under its patents, if any, for the use, sale or manufacture of products to
33+
* the extent that such products are used for performing Domain Name System
34+
* dynamic updates in TCP/IP networks by means of the Software. No immunity is
35+
* granted for any product per se or for any other function of any product.
36+
*
37+
* THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
38+
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39+
* PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
40+
* DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
41+
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
42+
* IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
43+
*/
44+
45+
/* OPENBSD ORIGINAL: lib/libc/net/base64.c */
46+
47+
#ifdef NO_B64
48+
49+
#include <sys/types.h>
50+
#include <sys/socket.h>
51+
#include <netinet/in.h>
52+
#include <arpa/inet.h>
53+
54+
#include <ctype.h>
55+
#include <stdio.h>
56+
57+
#include <stdlib.h>
58+
#include <string.h>
59+
60+
#include "compat.h"
61+
62+
static const char Base64[] =
63+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
64+
static const char Pad64 = '=';
65+
66+
/* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
67+
The following encoding technique is taken from RFC 1521 by Borenstein
68+
and Freed. It is reproduced here in a slightly edited form for
69+
convenience.
70+
71+
A 65-character subset of US-ASCII is used, enabling 6 bits to be
72+
represented per printable character. (The extra 65th character, "=",
73+
is used to signify a special processing function.)
74+
75+
The encoding process represents 24-bit groups of input bits as output
76+
strings of 4 encoded characters. Proceeding from left to right, a
77+
24-bit input group is formed by concatenating 3 8-bit input groups.
78+
These 24 bits are then treated as 4 concatenated 6-bit groups, each
79+
of which is translated into a single digit in the base64 alphabet.
80+
81+
Each 6-bit group is used as an index into an array of 64 printable
82+
characters. The character referenced by the index is placed in the
83+
output string.
84+
85+
Table 1: The Base64 Alphabet
86+
87+
Value Encoding Value Encoding Value Encoding Value Encoding
88+
0 A 17 R 34 i 51 z
89+
1 B 18 S 35 j 52 0
90+
2 C 19 T 36 k 53 1
91+
3 D 20 U 37 l 54 2
92+
4 E 21 V 38 m 55 3
93+
5 F 22 W 39 n 56 4
94+
6 G 23 X 40 o 57 5
95+
7 H 24 Y 41 p 58 6
96+
8 I 25 Z 42 q 59 7
97+
9 J 26 a 43 r 60 8
98+
10 K 27 b 44 s 61 9
99+
11 L 28 c 45 t 62 +
100+
12 M 29 d 46 u 63 /
101+
13 N 30 e 47 v
102+
14 O 31 f 48 w (pad) =
103+
15 P 32 g 49 x
104+
16 Q 33 h 50 y
105+
106+
Special processing is performed if fewer than 24 bits are available
107+
at the end of the data being encoded. A full encoding quantum is
108+
always completed at the end of a quantity. When fewer than 24 input
109+
bits are available in an input group, zero bits are added (on the
110+
right) to form an integral number of 6-bit groups. Padding at the
111+
end of the data is performed using the '=' character.
112+
113+
Since all base64 input is an integral number of octets, only the
114+
-------------------------------------------------
115+
following cases can arise:
116+
117+
(1) the final quantum of encoding input is an integral
118+
multiple of 24 bits; here, the final unit of encoded
119+
output will be an integral multiple of 4 characters
120+
with no "=" padding,
121+
(2) the final quantum of encoding input is exactly 8 bits;
122+
here, the final unit of encoded output will be two
123+
characters followed by two "=" padding characters, or
124+
(3) the final quantum of encoding input is exactly 16 bits;
125+
here, the final unit of encoded output will be three
126+
characters followed by one "=" padding character.
127+
*/
128+
129+
int
130+
b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize)
131+
{
132+
size_t datalength = 0;
133+
u_char input[3];
134+
u_char output[4];
135+
u_int i;
136+
137+
while (2 < srclength) {
138+
input[0] = *src++;
139+
input[1] = *src++;
140+
input[2] = *src++;
141+
srclength -= 3;
142+
143+
output[0] = input[0] >> 2;
144+
output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
145+
output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
146+
output[3] = input[2] & 0x3f;
147+
148+
if (datalength + 4 > targsize)
149+
return (-1);
150+
target[datalength++] = Base64[output[0]];
151+
target[datalength++] = Base64[output[1]];
152+
target[datalength++] = Base64[output[2]];
153+
target[datalength++] = Base64[output[3]];
154+
}
155+
156+
/* Now we worry about padding. */
157+
if (0 != srclength) {
158+
/* Get what's left. */
159+
input[0] = input[1] = input[2] = '\0';
160+
for (i = 0; i < srclength; i++)
161+
input[i] = *src++;
162+
163+
output[0] = input[0] >> 2;
164+
output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
165+
output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
166+
167+
if (datalength + 4 > targsize)
168+
return (-1);
169+
target[datalength++] = Base64[output[0]];
170+
target[datalength++] = Base64[output[1]];
171+
if (srclength == 1)
172+
target[datalength++] = Pad64;
173+
else
174+
target[datalength++] = Base64[output[2]];
175+
target[datalength++] = Pad64;
176+
}
177+
if (datalength >= targsize)
178+
return (-1);
179+
target[datalength] = '\0'; /* Returned value doesn't count \0. */
180+
return (datalength);
181+
}
182+
183+
/* skips all whitespace anywhere.
184+
converts characters, four at a time, starting at (or after)
185+
src from base - 64 numbers into three 8 bit bytes in the target area.
186+
it returns the number of data bytes stored at the target, or -1 on error.
187+
*/
188+
189+
int
190+
b64_pton(char const *src, u_char *target, size_t targsize)
191+
{
192+
u_int tarindex, state;
193+
int ch;
194+
char *pos;
195+
196+
state = 0;
197+
tarindex = 0;
198+
199+
while ((ch = *src++) != '\0') {
200+
if (isspace(ch)) /* Skip whitespace anywhere. */
201+
continue;
202+
203+
if (ch == Pad64)
204+
break;
205+
206+
pos = strchr(Base64, ch);
207+
if (pos == 0) /* A non-base64 character. */
208+
return (-1);
209+
210+
switch (state) {
211+
case 0:
212+
if (target) {
213+
if (tarindex >= targsize)
214+
return (-1);
215+
target[tarindex] = (pos - Base64) << 2;
216+
}
217+
state = 1;
218+
break;
219+
case 1:
220+
if (target) {
221+
if (tarindex + 1 >= targsize)
222+
return (-1);
223+
target[tarindex] |= (pos - Base64) >> 4;
224+
target[tarindex+1] = ((pos - Base64) & 0x0f)
225+
<< 4 ;
226+
}
227+
tarindex++;
228+
state = 2;
229+
break;
230+
case 2:
231+
if (target) {
232+
if (tarindex + 1 >= targsize)
233+
return (-1);
234+
target[tarindex] |= (pos - Base64) >> 2;
235+
target[tarindex+1] = ((pos - Base64) & 0x03)
236+
<< 6;
237+
}
238+
tarindex++;
239+
state = 3;
240+
break;
241+
case 3:
242+
if (target) {
243+
if (tarindex >= targsize)
244+
return (-1);
245+
target[tarindex] |= (pos - Base64);
246+
}
247+
tarindex++;
248+
state = 0;
249+
break;
250+
}
251+
}
252+
253+
/*
254+
* We are done decoding Base-64 chars. Let's see if we ended
255+
* on a byte boundary, and/or with erroneous trailing characters.
256+
*/
257+
258+
if (ch == Pad64) { /* We got a pad char. */
259+
ch = *src++; /* Skip it, get next. */
260+
switch (state) {
261+
case 0: /* Invalid = in first position */
262+
case 1: /* Invalid = in second position */
263+
return (-1);
264+
265+
case 2: /* Valid, means one byte of info */
266+
/* Skip any number of spaces. */
267+
for (; ch != '\0'; ch = *src++)
268+
if (!isspace(ch))
269+
break;
270+
/* Make sure there is another trailing = sign. */
271+
if (ch != Pad64)
272+
return (-1);
273+
ch = *src++; /* Skip the = */
274+
/* Fall through to "single trailing =" case. */
275+
/* FALLTHROUGH */
276+
277+
case 3: /* Valid, means two bytes of info */
278+
/*
279+
* We know this char is an =. Is there anything but
280+
* whitespace after it?
281+
*/
282+
for (; ch != '\0'; ch = *src++)
283+
if (!isspace(ch))
284+
return (-1);
285+
286+
/*
287+
* Now make sure for cases 2 and 3 that the "extra"
288+
* bits that slopped past the last full byte were
289+
* zeros. If we don't check them, they become a
290+
* subliminal channel.
291+
*/
292+
if (target && target[tarindex] != 0)
293+
return (-1);
294+
}
295+
} else {
296+
/*
297+
* We ended by seeing the end of the string. Make sure we
298+
* have no partial bytes lying around.
299+
*/
300+
if (state != 0)
301+
return (-1);
302+
}
303+
304+
return (tarindex);
305+
}
306+
307+
#endif /* NO_B64 */

compat.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,21 @@ long long strtonum(const char *, long long, long long, const char **);
7070
*/
7171
void sshbuf_dump_data(const void *, size_t, FILE *);
7272

73+
/*
74+
* Musl
75+
*/
76+
#ifdef NO_B64
77+
int b64_ntop(u_char const *src, size_t srclength, char *target,
78+
size_t targsize);
79+
int b64_pton(char const *src, u_char *target, size_t targsize);
80+
#else
81+
#include <resolv.h>
82+
#endif /* NO_B64 */
83+
84+
/*
85+
* Linkining with the one billion versions of openssl is a pain, we need sha256
86+
* for a dozen bytes only.
87+
*/
88+
#include "sha256.h"
89+
7390
#endif /* _COMPAT_H */

0 commit comments

Comments
 (0)