1#!/usr/bin/env perl 2# 3# har2qif.pl - Convert HAR file to a QIF file. 4# 5# QIF stands for "QPACK Interop Format." It is meant to be easy to parse, 6# write, and compare. The idea is that the QIF input to an encoder can 7# be compared to QIF output produced by the decoder using `diff(1)': 8# 9# sh$ har2qif -requests my.har > in.qif 10# sh$ ./qpack-encoder-A in.qif > binary-format # See wiki 11# sh$ ./qpack-decoder-B binary-format > out.qif 12# sh$ diff in.qif out.qif && echo "Success!" 13# 14# The QIF format is plain text. Each header field in on a separate line, 15# with name and value separated by the TAB character. An empty line 16# signifies the end of a header set. Lines beginning with '#' are ignored. 17# 18# HTTP/2 header sets are left untouched, while non-HTTP/2 header sets are 19# transformed to resemble HTTP/2: 20# - Header names are lowercased 21# - `Host' header name is removed from requests 22# - Requests get :method, :scheme, :authority, and :path pseudo-headers 23# - Responses get :status pseudo-headers 24 25use strict; 26use warnings; 27 28use Getopt::Long; 29use JSON qw(decode_json); 30use URI; 31 32my $key = 'request'; 33my ($limit, $split_cookie); 34 35GetOptions( "help" => sub { 36 print <<USAGE; 37Usage: $0 [options] [file.har] > file.qif 38 39Options: 40 -requests Print headers from requests. This is the default. 41 -responses Print headers from responses. 42 -split-cookie Split the Cookie: header. 43 -limit N Limit number of header sets to N. The default 44 is no limit. 45 46If file.har is not specified, the HAR is read from stdin 47USAGE 48 49 exit; 50}, 51 "requests" => sub { $key = "request", }, 52 "responses" => sub { $key = "response", }, 53 "split-cookie" => \$split_cookie, 54 "limit=i" => \$limit, 55); 56 57my $json = decode_json( do { undef $/; <> }); 58my @messages = map $$_{$key}, @{ $json->{log}{entries} }; 59if (defined($limit)) 60{ 61 splice @messages, $limit; 62} 63 64my @header_sets = do { 65 if ($key eq 'request') { 66 map req_header_set($_), @messages 67 } else { 68 map resp_header_set($_), @messages 69 } 70}; 71 72for (@header_sets) { 73 no warnings 'uninitialized'; 74 print map "$$_[0]\t$$_[1]\n", @$_; 75 print "\n"; 76} 77 78exit; 79 80# Looking at capitalization of the first header is a more reliable means 81# of determining HTTP version than relying on httpVersion field. 82# 83sub is_http2 { 84 my $message = shift; 85 if (defined($$message{headers}[0]) 86 && defined($$message{headers}[0]{name})) { 87 return $$message{headers}[0]{name} =~ /^[a-z:]/; 88 } elsif (defined($$message{httpVersion})) { 89 return $$message{httpVersion} =~ m~HTTP/2~i; 90 } else { 91 return; 92 } 93} 94 95sub req_header_set { 96 my $message = shift; 97 my @set; 98 if (!is_http2($message)) { 99 my @headers = map [ lc($$_{name}), $$_{value}, ], 100 grep $$_{name} ne 'Host', @{ $$message{headers} }; 101 my $uri = URI->new($$message{url}); 102 @set = ( 103 [ ':method', $$message{method}, ], 104 [ ':scheme', $uri->scheme, ], 105 [ ':authority', $uri->authority, ], 106 [ ':path', $uri->path_query, ], 107 @headers, 108 ); 109 } else { 110 @set = map [ $$_{name}, $$_{value}, ], @{ $$message{headers} }; 111 } 112 if ($split_cookie) { 113 return [ map { 114 if ('cookie' eq $$_[0]) { 115 map [ 'cookie', $_, ], split /;\s+/, $$_[1] 116 } else { 117 $_ 118 } 119 } @set ]; 120 } else { 121 return \@set; 122 } 123} 124 125sub resp_header_set { 126 my $message = shift; 127 no warnings 'uninitialized'; 128 if (!is_http2($message)) { 129 my @headers = map [ lc($$_{name}), $$_{value}, ], 130 @{ $$message{headers} }; 131 my $uri = URI->new($$message{url}); 132 return [ 133 [ ':status', $$message{status}, ], 134 @headers, 135 ]; 136 } else { 137 return [ map [ $$_{name}, $$_{value}, ], @{ $$message{headers} } ] 138 } 139} 140