1#!/usr/bin/env perl
2#
3# har2qif.pl    - Convert HAR file to a QIF file.
4#
5# QIF stands for "QPACK Interop Format."  It is meant to be easy to parse,
6# write, and compare.  The idea is that the QIF input to an encoder can
7# be compared to QIF output produced by the decoder using `diff(1)':
8#
9#   sh$ har2qif -requests my.har > in.qif
10#   sh$ ./qpack-encoder-A in.qif > binary-format    # See wiki
11#   sh$ ./qpack-decoder-B binary-format > out.qif
12#   sh$ diff in.qif out.qif && echo "Success!"
13#
14# The QIF format is plain text.  Each header field in on a separate line,
15# with name and value separated by the TAB character.  An empty line
16# signifies the end of a header set.  Lines beginning with '#' are ignored.
17#
18# HTTP/2 header sets are left untouched, while non-HTTP/2 header sets are
19# transformed to resemble HTTP/2:
20#   - Header names are lowercased
21#   - `Host' header name is removed from requests
22#   - Requests get :method, :scheme, :authority, and :path pseudo-headers
23#   - Responses get :status pseudo-headers
24
25use strict;
26use warnings;
27
28use Getopt::Long;
29use JSON qw(decode_json);
30use URI;
31
32my $key = 'request';
33my ($limit, $split_cookie);
34
35GetOptions( "help" => sub {
36    print <<USAGE;
37Usage: $0 [options] [file.har] > file.qif
38
39Options:
40    -requests       Print headers from requests.  This is the default.
41    -responses      Print headers from responses.
42    -split-cookie   Split the Cookie: header.
43    -limit N        Limit number of header sets to N.  The default
44                      is no limit.
45
46If file.har is not specified, the HAR is read from stdin
47USAGE
48
49    exit;
50},
51            "requests"  => sub { $key = "request", },
52            "responses" => sub { $key = "response", },
53            "split-cookie" => \$split_cookie,
54            "limit=i"   => \$limit,
55);
56
57my $json = decode_json( do { undef $/; <> });
58my @messages = map $$_{$key}, @{ $json->{log}{entries} };
59if (defined($limit))
60{
61    splice @messages, $limit;
62}
63
64my @header_sets = do {
65    if ($key eq 'request') {
66        map req_header_set($_), @messages
67    } else {
68        map resp_header_set($_), @messages
69    }
70};
71
72for (@header_sets) {
73    no warnings 'uninitialized';
74    print map "$$_[0]\t$$_[1]\n", @$_;
75    print "\n";
76}
77
78exit;
79
80# Looking at capitalization of the first header is a more reliable means
81# of determining HTTP version than relying on httpVersion field.
82#
83sub is_http2 {
84    my $message = shift;
85    if (defined($$message{headers}[0])
86                                && defined($$message{headers}[0]{name})) {
87        return $$message{headers}[0]{name} =~ /^[a-z:]/;
88    } elsif (defined($$message{httpVersion})) {
89        return $$message{httpVersion} =~ m~HTTP/2~i;
90    } else {
91        return;
92    }
93}
94
95sub req_header_set {
96    my $message = shift;
97    my @set;
98    if (!is_http2($message)) {
99        my @headers = map [ lc($$_{name}), $$_{value}, ],
100                      grep $$_{name} ne 'Host', @{ $$message{headers} };
101        my $uri = URI->new($$message{url});
102        @set = (
103            [ ':method',    $$message{method}, ],
104            [ ':scheme',    $uri->scheme, ],
105            [ ':authority', $uri->authority, ],
106            [ ':path',      $uri->path_query, ],
107            @headers,
108        );
109    } else {
110        @set = map [ $$_{name}, $$_{value}, ], @{ $$message{headers} };
111    }
112    if ($split_cookie) {
113        return [ map {
114                    if ('cookie' eq $$_[0]) {
115                        map [ 'cookie', $_, ], split /;\s+/, $$_[1]
116                    } else {
117                        $_
118                    }
119                 } @set ];
120    } else {
121        return \@set;
122    }
123}
124
125sub resp_header_set {
126    my $message = shift;
127    no warnings 'uninitialized';
128    if (!is_http2($message)) {
129        my @headers = map [ lc($$_{name}), $$_{value}, ],
130                                                @{ $$message{headers} };
131        my $uri = URI->new($$message{url});
132        return [
133            [ ':status',    $$message{status}, ],
134            @headers,
135        ];
136    } else {
137        return [ map [ $$_{name}, $$_{value}, ], @{ $$message{headers} } ]
138    }
139}
140