@@ -26,7 +26,6 @@ import (
26
26
"github.com/google/uuid"
27
27
"github.com/pkg/errors"
28
28
"github.com/postmanlabs/postman-insights-agent/printer"
29
- "github.com/postmanlabs/postman-insights-agent/telemetry"
30
29
"golang.org/x/text/encoding/ianaindex"
31
30
"golang.org/x/text/transform"
32
31
"gopkg.in/yaml.v2"
@@ -145,14 +144,16 @@ func ParseHTTP(elem akinet.ParsedNetworkContent) (*PartialWitness, error) {
145
144
}
146
145
147
146
if err != nil {
148
- // Just log an error instead of returning an error so users can see the
149
- // other parts of the endpoint in the spec rather than an empty spec.
150
- // https://app.clubhouse.io/akita-software/story/1898/juan-s-payload-problem
151
- telemetry . RateLimitError ( "unparsable body" , err )
152
- printer . Debugf ( "skipping unparsable body: %v \n " , err )
153
- } else if bodyData != nil {
154
- datas = append ( datas , bodyData )
147
+ // When the body is unparsable even after attempting fallback decompressions,
148
+ // we will try to capture the body as a string and indicate parsing error in the body meta
149
+ bodyStream := rawBody . CreateReader ()
150
+ // we are ignoring the error from decodeBody here
151
+ // if the decodeStream is nil, we will capture a placeholder bodyData that would say we received an unparsable body
152
+ decodeStream , _ := decodeBody ( headers , bodyStream , bodyDecompressed )
153
+ bodyData = captureUnparsableBody ( decodeStream , contentType , statusCode )
155
154
}
155
+
156
+ datas = append (datas , bodyData )
156
157
}
157
158
158
159
method := & pb.Method {Id : UnassignedHTTPID (), Meta : methodMeta }
@@ -307,32 +308,7 @@ func parseBody(contentType string, bodyStream io.Reader, statusCode int) (*pb.Da
307
308
return parseMultipartBody ("mixed" , mediaParams ["boundary" ], bodyStream , statusCode )
308
309
}
309
310
310
- // Otherwise, use media type to decide how to parse the body.
311
- // TODO: XML parsing
312
- // TODO: application/json-seq (RFC 7466)?
313
- // TODO: more text/* types
314
- var parseBodyDataAs pb.HTTPBody_ContentType
315
- switch mediaType {
316
- case "application/json" :
317
- parseBodyDataAs = pb .HTTPBody_JSON
318
- case "application/x-www-form-urlencoded" :
319
- parseBodyDataAs = pb .HTTPBody_FORM_URL_ENCODED
320
- case "application/yaml" , "application/x-yaml" , "text/yaml" , "text/x-yaml" :
321
- parseBodyDataAs = pb .HTTPBody_YAML
322
- case "application/octet-stream" :
323
- parseBodyDataAs = pb .HTTPBody_OCTET_STREAM
324
- case "text/plain" , "text/csv" :
325
- parseBodyDataAs = pb .HTTPBody_TEXT_PLAIN
326
- case "text/html" :
327
- parseBodyDataAs = pb .HTTPBody_TEXT_HTML
328
- default :
329
- // Handle custom JSON-encoded media types.
330
- if strings .HasSuffix (mediaType , "+json" ) {
331
- parseBodyDataAs = pb .HTTPBody_JSON
332
- } else {
333
- parseBodyDataAs = pb .HTTPBody_OTHER
334
- }
335
- }
311
+ parseBodyDataAs := getContentTypeFromMediaType (mediaType )
336
312
337
313
var bodyData * pb.Data
338
314
@@ -443,6 +419,73 @@ func parseBody(contentType string, bodyStream io.Reader, statusCode int) (*pb.Da
443
419
return bodyData , nil
444
420
}
445
421
422
+ // When we can't parse the body, we will try to capture it as a raw primitive string and
423
+ // indicate the parsing error in the body meta.
424
+ func captureUnparsableBody (bodyStream io.Reader , contentType string , statusCode int ) * pb.Data {
425
+ mediaType , _ , _ := mime .ParseMediaType (contentType )
426
+ bodyData := & pb.Data {
427
+ Value : newDataPrimitive (categorizeStringToPrimitive ("Cannot parse body" )),
428
+ Meta : newDataMetaHTTPMeta (& pb.HTTPMeta {
429
+ Location : & pb.HTTPMeta_Body {
430
+ Body : & pb.HTTPBody {
431
+ ContentType : getContentTypeFromMediaType (mediaType ),
432
+ OtherType : contentType ,
433
+ Errors : pb .HTTPBody_PARSING_ERROR ,
434
+ },
435
+ },
436
+ ResponseCode : int32 (statusCode ),
437
+ }),
438
+ }
439
+
440
+ if bodyStream == nil {
441
+ return bodyData
442
+ }
443
+
444
+ // Categorize the body as a string.
445
+ buf := new (strings.Builder )
446
+ _ , err := io .Copy (buf , bodyStream )
447
+ if err != nil {
448
+ return bodyData
449
+ }
450
+
451
+ bodyData .Value = newDataPrimitive (categorizeStringToPrimitive (buf .String ()))
452
+ return bodyData
453
+ }
454
+
455
+ // Gets the content type to use for parsing the body based on the media type.
456
+ // E.g. application/json -> JSON, application/x-www-form-urlencoded -> FORM_URL_ENCODED.
457
+ // Also handles the case where the media type is a custom JSON-encoded media type.
458
+ func getContentTypeFromMediaType (mediaType string ) pb.HTTPBody_ContentType {
459
+ // Use media type to decide how to parse the body.
460
+ // TODO: XML parsing
461
+ // TODO: application/json-seq (RFC 7466)?
462
+ // TODO: more text/* types
463
+ var parseBodyDataAs pb.HTTPBody_ContentType
464
+ switch mediaType {
465
+ case "application/json" :
466
+ parseBodyDataAs = pb .HTTPBody_JSON
467
+ case "application/x-www-form-urlencoded" :
468
+ parseBodyDataAs = pb .HTTPBody_FORM_URL_ENCODED
469
+ case "application/yaml" , "application/x-yaml" , "text/yaml" , "text/x-yaml" :
470
+ parseBodyDataAs = pb .HTTPBody_YAML
471
+ case "application/octet-stream" :
472
+ parseBodyDataAs = pb .HTTPBody_OCTET_STREAM
473
+ case "text/plain" , "text/csv" :
474
+ parseBodyDataAs = pb .HTTPBody_TEXT_PLAIN
475
+ case "text/html" :
476
+ parseBodyDataAs = pb .HTTPBody_TEXT_HTML
477
+ default :
478
+ // Handle custom JSON-encoded media types.
479
+ if strings .HasSuffix (mediaType , "+json" ) {
480
+ parseBodyDataAs = pb .HTTPBody_JSON
481
+ } else {
482
+ parseBodyDataAs = pb .HTTPBody_OTHER
483
+ }
484
+ }
485
+
486
+ return parseBodyDataAs
487
+ }
488
+
446
489
func parseMultipartBody (multipartType string , boundary string , bodyStream io.Reader , statusCode int ) (* pb.Data , error ) {
447
490
fields := map [string ]* pb.Data {}
448
491
r := multipart .NewReader (bodyStream , boundary )
0 commit comments