benchmark.cc revision c377920e
1c377920eSShuo Chen#include "file.h"
2c377920eSShuo Chen#include "timer.h"
3c377920eSShuo Chen
4c377920eSShuo Chen#include "absl/container/flat_hash_set.h"
5c377920eSShuo Chen
6c377920eSShuo Chenint main(int argc, char* argv[])
7c377920eSShuo Chen{
8c377920eSShuo Chen  setlocale(LC_NUMERIC, "");
9c377920eSShuo Chen
10c377920eSShuo Chen  bool sequential = false;
11c377920eSShuo Chen  int buffer_size = File::kBufferSize;
12c377920eSShuo Chen  int opt;
13c377920eSShuo Chen  while ((opt = getopt(argc, argv, "b:s")) != -1)
14c377920eSShuo Chen  {
15c377920eSShuo Chen    switch (opt)
16c377920eSShuo Chen    {
17c377920eSShuo Chen      case 'b':
18c377920eSShuo Chen        buffer_size = atoi(optarg);
19c377920eSShuo Chen        break;
20c377920eSShuo Chen      case 's':
21c377920eSShuo Chen        sequential = true;
22c377920eSShuo Chen        break;
23c377920eSShuo Chen    }
24c377920eSShuo Chen  }
25c377920eSShuo Chen
26c377920eSShuo Chen  std::vector<std::unique_ptr<InputFile>> files;
27c377920eSShuo Chen  files.reserve(argc - optind);
28c377920eSShuo Chen  LOG_INFO << "Reading " << argc - optind << " files "
29c377920eSShuo Chen      << (sequential ? "sequentially" : "randomly")
30c377920eSShuo Chen      << ", buffer size " << buffer_size;
31c377920eSShuo Chen  Timer timer;
32c377920eSShuo Chen  int64_t total = 0;
33c377920eSShuo Chen  int64_t lines = 0;
34c377920eSShuo Chen  for (int i = optind; i < argc; ++i)
35c377920eSShuo Chen  {
36c377920eSShuo Chen    files.emplace_back(new InputFile(argv[i], buffer_size));
37c377920eSShuo Chen  }
38c377920eSShuo Chen
39c377920eSShuo Chen  if (sequential)
40c377920eSShuo Chen  {
41c377920eSShuo Chen    for (const auto& file : files)
42c377920eSShuo Chen    {
43c377920eSShuo Chen      Timer t;
44c377920eSShuo Chen      std::string line;
45c377920eSShuo Chen      while (file->getline(&line))
46c377920eSShuo Chen      {
47c377920eSShuo Chen        ++lines;
48c377920eSShuo Chen      }
49c377920eSShuo Chen      int64_t len = file->tell();
50c377920eSShuo Chen      LOG_DEBUG << "Done " << file->filename() << " " << t.report(len);
51c377920eSShuo Chen      total += len;
52c377920eSShuo Chen    }
53c377920eSShuo Chen  }
54c377920eSShuo Chen  else
55c377920eSShuo Chen  {
56c377920eSShuo Chen    std::string line;
57c377920eSShuo Chen    absl::flat_hash_set<InputFile*> toRemove;
58c377920eSShuo Chen    while (!files.empty())
59c377920eSShuo Chen    {
60c377920eSShuo Chen      toRemove.clear();
61c377920eSShuo Chen      // read one line from each file
62c377920eSShuo Chen      for (const auto& file : files)
63c377920eSShuo Chen      {
64c377920eSShuo Chen        if (file->getline(&line))
65c377920eSShuo Chen        {
66c377920eSShuo Chen          ++lines;
67c377920eSShuo Chen        }
68c377920eSShuo Chen        else
69c377920eSShuo Chen        {
70c377920eSShuo Chen          toRemove.insert(file.get());
71c377920eSShuo Chen        }
72c377920eSShuo Chen      }
73c377920eSShuo Chen      if (!toRemove.empty())
74c377920eSShuo Chen      {
75c377920eSShuo Chen        for (const auto* f : toRemove)
76c377920eSShuo Chen        {
77c377920eSShuo Chen          total += f->tell();
78c377920eSShuo Chen          LOG_DEBUG << "Done " << f->filename();
79c377920eSShuo Chen        }
80c377920eSShuo Chen        // std::partition?
81c377920eSShuo Chen        auto it = std::remove_if(files.begin(), files.end(),
82c377920eSShuo Chen                                 [&toRemove] (const auto& f) { return toRemove.count(f.get()) > 0; });
83c377920eSShuo Chen        assert(files.end() - it == toRemove.size());
84c377920eSShuo Chen        files.erase(it, files.end());
85c377920eSShuo Chen      }
86c377920eSShuo Chen    }
87c377920eSShuo Chen  }
88c377920eSShuo Chen
89c377920eSShuo Chen  LOG_INFO << "All done " << timer.report(total) << " "
90c377920eSShuo Chen      << muduo::Fmt("%'ld", lines) << " lines";
91c377920eSShuo Chen}
92