17705366eSDmitri Tikhonov#!/usr/bin/env perl
27705366eSDmitri Tikhonov#
37705366eSDmitri Tikhonov# har2qif.pl    - Convert HAR file to a QIF file.
47705366eSDmitri Tikhonov#
57705366eSDmitri Tikhonov# QIF stands for "QPACK Interop Format."  It is meant to be easy to parse,
67705366eSDmitri Tikhonov# write, and compare.  The idea is that the QIF input to an encoder can
77705366eSDmitri Tikhonov# be compared to QIF output produced by the decoder using `diff(1)':
87705366eSDmitri Tikhonov#
97705366eSDmitri Tikhonov#   sh$ har2qif -requests my.har > in.qif
107705366eSDmitri Tikhonov#   sh$ ./qpack-encoder-A in.qif > binary-format    # See wiki
117705366eSDmitri Tikhonov#   sh$ ./qpack-decoder-B binary-format > out.qif
127705366eSDmitri Tikhonov#   sh$ diff in.qif out.qif && echo "Success!"
137705366eSDmitri Tikhonov#
147705366eSDmitri Tikhonov# The QIF format is plain text.  Each header field in on a separate line,
157705366eSDmitri Tikhonov# with name and value separated by the TAB character.  An empty line
167705366eSDmitri Tikhonov# signifies the end of a header set.  Lines beginning with '#' are ignored.
177705366eSDmitri Tikhonov#
187705366eSDmitri Tikhonov# HTTP/2 header sets are left untouched, while non-HTTP/2 header sets are
197705366eSDmitri Tikhonov# transformed to resemble HTTP/2:
207705366eSDmitri Tikhonov#   - Header names are lowercased
217705366eSDmitri Tikhonov#   - `Host' header name is removed from requests
227705366eSDmitri Tikhonov#   - Requests get :method, :scheme, :authority, and :path pseudo-headers
237705366eSDmitri Tikhonov#   - Responses get :status pseudo-headers
247705366eSDmitri Tikhonov
257705366eSDmitri Tikhonovuse strict;
267705366eSDmitri Tikhonovuse warnings;
277705366eSDmitri Tikhonov
287705366eSDmitri Tikhonovuse Getopt::Long;
297705366eSDmitri Tikhonovuse JSON qw(decode_json);
307705366eSDmitri Tikhonovuse URI;
317705366eSDmitri Tikhonov
327705366eSDmitri Tikhonovmy $key = 'request';
336ab76434SDmitri Tikhonovmy ($limit, $split_cookie);
347705366eSDmitri Tikhonov
357705366eSDmitri TikhonovGetOptions( "help" => sub {
367705366eSDmitri Tikhonov    print <<USAGE;
377705366eSDmitri TikhonovUsage: $0 [options] [file.har] > file.qif
387705366eSDmitri Tikhonov
397705366eSDmitri TikhonovOptions:
407705366eSDmitri Tikhonov    -requests       Print headers from requests.  This is the default.
417705366eSDmitri Tikhonov    -responses      Print headers from responses.
426ab76434SDmitri Tikhonov    -split-cookie   Split the Cookie: header.
437705366eSDmitri Tikhonov    -limit N        Limit number of header sets to N.  The default
447705366eSDmitri Tikhonov                      is no limit.
457705366eSDmitri Tikhonov
467705366eSDmitri TikhonovIf file.har is not specified, the HAR is read from stdin
477705366eSDmitri TikhonovUSAGE
487705366eSDmitri Tikhonov
497705366eSDmitri Tikhonov    exit;
507705366eSDmitri Tikhonov},
517705366eSDmitri Tikhonov            "requests"  => sub { $key = "request", },
527705366eSDmitri Tikhonov            "responses" => sub { $key = "response", },
536ab76434SDmitri Tikhonov            "split-cookie" => \$split_cookie,
547705366eSDmitri Tikhonov            "limit=i"   => \$limit,
557705366eSDmitri Tikhonov);
567705366eSDmitri Tikhonov
577705366eSDmitri Tikhonovmy $json = decode_json( do { undef $/; <> });
587705366eSDmitri Tikhonovmy @messages = map $$_{$key}, @{ $json->{log}{entries} };
597705366eSDmitri Tikhonovif (defined($limit))
607705366eSDmitri Tikhonov{
617705366eSDmitri Tikhonov    splice @messages, $limit;
627705366eSDmitri Tikhonov}
637705366eSDmitri Tikhonov
647705366eSDmitri Tikhonovmy @header_sets = do {
657705366eSDmitri Tikhonov    if ($key eq 'request') {
667705366eSDmitri Tikhonov        map req_header_set($_), @messages
677705366eSDmitri Tikhonov    } else {
687705366eSDmitri Tikhonov        map resp_header_set($_), @messages
697705366eSDmitri Tikhonov    }
707705366eSDmitri Tikhonov};
717705366eSDmitri Tikhonov
727705366eSDmitri Tikhonovfor (@header_sets) {
73f01c2e94SDmitri Tikhonov    no warnings 'uninitialized';
747705366eSDmitri Tikhonov    print map "$$_[0]\t$$_[1]\n", @$_;
757705366eSDmitri Tikhonov    print "\n";
767705366eSDmitri Tikhonov}
777705366eSDmitri Tikhonov
787705366eSDmitri Tikhonovexit;
797705366eSDmitri Tikhonov
807705366eSDmitri Tikhonov# Looking at capitalization of the first header is a more reliable means
817705366eSDmitri Tikhonov# of determining HTTP version than relying on httpVersion field.
827705366eSDmitri Tikhonov#
837705366eSDmitri Tikhonovsub is_http2 {
847705366eSDmitri Tikhonov    my $message = shift;
85f01c2e94SDmitri Tikhonov    if (defined($$message{headers}[0])
86f01c2e94SDmitri Tikhonov                                && defined($$message{headers}[0]{name})) {
87f01c2e94SDmitri Tikhonov        return $$message{headers}[0]{name} =~ /^[a-z:]/;
88f01c2e94SDmitri Tikhonov    } elsif (defined($$message{httpVersion})) {
89f01c2e94SDmitri Tikhonov        return $$message{httpVersion} =~ m~HTTP/2~i;
90f01c2e94SDmitri Tikhonov    } else {
91f01c2e94SDmitri Tikhonov        return;
92f01c2e94SDmitri Tikhonov    }
937705366eSDmitri Tikhonov}
947705366eSDmitri Tikhonov
957705366eSDmitri Tikhonovsub req_header_set {
967705366eSDmitri Tikhonov    my $message = shift;
976ab76434SDmitri Tikhonov    my @set;
987705366eSDmitri Tikhonov    if (!is_http2($message)) {
997705366eSDmitri Tikhonov        my @headers = map [ lc($$_{name}), $$_{value}, ],
1007705366eSDmitri Tikhonov                      grep $$_{name} ne 'Host', @{ $$message{headers} };
1017705366eSDmitri Tikhonov        my $uri = URI->new($$message{url});
1026ab76434SDmitri Tikhonov        @set = (
1037705366eSDmitri Tikhonov            [ ':method',    $$message{method}, ],
1047705366eSDmitri Tikhonov            [ ':scheme',    $uri->scheme, ],
1057705366eSDmitri Tikhonov            [ ':authority', $uri->authority, ],
1067705366eSDmitri Tikhonov            [ ':path',      $uri->path_query, ],
1077705366eSDmitri Tikhonov            @headers,
1086ab76434SDmitri Tikhonov        );
1097705366eSDmitri Tikhonov    } else {
1106ab76434SDmitri Tikhonov        @set = map [ $$_{name}, $$_{value}, ], @{ $$message{headers} };
1116ab76434SDmitri Tikhonov    }
1126ab76434SDmitri Tikhonov    if ($split_cookie) {
1136ab76434SDmitri Tikhonov        return [ map {
1146ab76434SDmitri Tikhonov                    if ('cookie' eq $$_[0]) {
1156ab76434SDmitri Tikhonov                        map [ 'cookie', $_, ], split /;\s+/, $$_[1]
1166ab76434SDmitri Tikhonov                    } else {
1176ab76434SDmitri Tikhonov                        $_
1186ab76434SDmitri Tikhonov                    }
1196ab76434SDmitri Tikhonov                 } @set ];
1206ab76434SDmitri Tikhonov    } else {
1216ab76434SDmitri Tikhonov        return \@set;
1227705366eSDmitri Tikhonov    }
1237705366eSDmitri Tikhonov}
1247705366eSDmitri Tikhonov
1257705366eSDmitri Tikhonovsub resp_header_set {
1267705366eSDmitri Tikhonov    my $message = shift;
127f01c2e94SDmitri Tikhonov    no warnings 'uninitialized';
1287705366eSDmitri Tikhonov    if (!is_http2($message)) {
1297705366eSDmitri Tikhonov        my @headers = map [ lc($$_{name}), $$_{value}, ],
1307705366eSDmitri Tikhonov                                                @{ $$message{headers} };
1317705366eSDmitri Tikhonov        my $uri = URI->new($$message{url});
1327705366eSDmitri Tikhonov        return [
1337705366eSDmitri Tikhonov            [ ':status',    $$message{status}, ],
1347705366eSDmitri Tikhonov            @headers,
1357705366eSDmitri Tikhonov        ];
1367705366eSDmitri Tikhonov    } else {
1377705366eSDmitri Tikhonov        return [ map [ $$_{name}, $$_{value}, ], @{ $$message{headers} } ]
1387705366eSDmitri Tikhonov    }
1397705366eSDmitri Tikhonov}
140