1c377920eSShuo Chen#include "file.h"
2da39c979SShuo Chen#include "input.h"
3c377920eSShuo Chen#include "timer.h"
4c377920eSShuo Chen
5c377920eSShuo Chen#include "absl/container/flat_hash_set.h"
6c377920eSShuo Chen
7c377920eSShuo Chenint main(int argc, char* argv[])
8c377920eSShuo Chen{
9c377920eSShuo Chen  setlocale(LC_NUMERIC, "");
10c377920eSShuo Chen
11da39c979SShuo Chen  bool combine = false;
12c377920eSShuo Chen  bool sequential = false;
136f2e1683SShuo Chen  int buffer_size = kBufferSize;
14c377920eSShuo Chen  int opt;
15da39c979SShuo Chen  while ((opt = getopt(argc, argv, "b:cs")) != -1)
16c377920eSShuo Chen  {
17c377920eSShuo Chen    switch (opt)
18c377920eSShuo Chen    {
19c377920eSShuo Chen      case 'b':
20c377920eSShuo Chen        buffer_size = atoi(optarg);
21c377920eSShuo Chen        break;
22da39c979SShuo Chen      case 'c':
23da39c979SShuo Chen        combine = true;
24da39c979SShuo Chen        break;
25c377920eSShuo Chen      case 's':
26c377920eSShuo Chen        sequential = true;
27c377920eSShuo Chen        break;
28c377920eSShuo Chen    }
29c377920eSShuo Chen  }
30c377920eSShuo Chen
31da39c979SShuo Chen  LOG_INFO << "Reading " << argc - optind << (combine ? " segment " : "") << " files "
32c377920eSShuo Chen      << (sequential ? "sequentially" : "randomly")
33c377920eSShuo Chen      << ", buffer size " << buffer_size;
34c377920eSShuo Chen  Timer timer;
35c377920eSShuo Chen  int64_t total = 0;
36c377920eSShuo Chen  int64_t lines = 0;
37da39c979SShuo Chen  int64_t count = 0;
38da39c979SShuo Chen
39da39c979SShuo Chen  if (combine)
40da39c979SShuo Chen  {
41da39c979SShuo Chen  std::vector<std::unique_ptr<SegmentInput>> inputs;
42da39c979SShuo Chen  inputs.reserve(argc - optind);
43da39c979SShuo Chen  for (int i = optind; i < argc; ++i)
44da39c979SShuo Chen  {
45da39c979SShuo Chen    inputs.emplace_back(new SegmentInput(argv[i], buffer_size));
46da39c979SShuo Chen  }
47da39c979SShuo Chen
48da39c979SShuo Chen  if (sequential)
49da39c979SShuo Chen  {
50da39c979SShuo Chen    for (const auto& input : inputs)
51da39c979SShuo Chen    {
52da39c979SShuo Chen      Timer t;
53da39c979SShuo Chen      //std::string line;
54da39c979SShuo Chen      while (input->next())
55da39c979SShuo Chen      {
56da39c979SShuo Chen        count += input->current_count();
57da39c979SShuo Chen        ++lines;
58da39c979SShuo Chen      }
59da39c979SShuo Chen      int64_t len = input->tell();
60da39c979SShuo Chen      LOG_INFO << "Done " << input->filename() << " " << t.report(len);
61da39c979SShuo Chen      total += len;
62da39c979SShuo Chen    }
63da39c979SShuo Chen  }
64da39c979SShuo Chen  else
65da39c979SShuo Chen  {
66da39c979SShuo Chen  }
67da39c979SShuo Chen  }
68da39c979SShuo Chen  else
69da39c979SShuo Chen  {
70da39c979SShuo Chen  std::vector<std::unique_ptr<InputFile>> files;
71da39c979SShuo Chen  files.reserve(argc - optind);
72c377920eSShuo Chen  for (int i = optind; i < argc; ++i)
73c377920eSShuo Chen  {
74c377920eSShuo Chen    files.emplace_back(new InputFile(argv[i], buffer_size));
75c377920eSShuo Chen  }
76c377920eSShuo Chen
77c377920eSShuo Chen  if (sequential)
78c377920eSShuo Chen  {
79c377920eSShuo Chen    for (const auto& file : files)
80c377920eSShuo Chen    {
81c377920eSShuo Chen      Timer t;
82c377920eSShuo Chen      std::string line;
83c377920eSShuo Chen      while (file->getline(&line))
84c377920eSShuo Chen      {
85c377920eSShuo Chen        ++lines;
86c377920eSShuo Chen      }
87c377920eSShuo Chen      int64_t len = file->tell();
88c377920eSShuo Chen      LOG_DEBUG << "Done " << file->filename() << " " << t.report(len);
89c377920eSShuo Chen      total += len;
90c377920eSShuo Chen    }
91c377920eSShuo Chen  }
92c377920eSShuo Chen  else
93c377920eSShuo Chen  {
94c377920eSShuo Chen    std::string line;
95c377920eSShuo Chen    absl::flat_hash_set<InputFile*> toRemove;
96c377920eSShuo Chen    while (!files.empty())
97c377920eSShuo Chen    {
98c377920eSShuo Chen      toRemove.clear();
99c377920eSShuo Chen      // read one line from each file
100c377920eSShuo Chen      for (const auto& file : files)
101c377920eSShuo Chen      {
102c377920eSShuo Chen        if (file->getline(&line))
103c377920eSShuo Chen        {
104c377920eSShuo Chen          ++lines;
105c377920eSShuo Chen        }
106c377920eSShuo Chen        else
107c377920eSShuo Chen        {
108c377920eSShuo Chen          toRemove.insert(file.get());
109c377920eSShuo Chen        }
110c377920eSShuo Chen      }
111c377920eSShuo Chen      if (!toRemove.empty())
112c377920eSShuo Chen      {
1136f2e1683SShuo Chen        for (auto* f : toRemove)
114c377920eSShuo Chen        {
115c377920eSShuo Chen          total += f->tell();
116c377920eSShuo Chen          LOG_DEBUG << "Done " << f->filename();
117c377920eSShuo Chen        }
118c377920eSShuo Chen        // std::partition?
119c377920eSShuo Chen        auto it = std::remove_if(files.begin(), files.end(),
120c377920eSShuo Chen                                 [&toRemove] (const auto& f) { return toRemove.count(f.get()) > 0; });
121c377920eSShuo Chen        assert(files.end() - it == toRemove.size());
122c377920eSShuo Chen        files.erase(it, files.end());
123c377920eSShuo Chen      }
124c377920eSShuo Chen    }
125c377920eSShuo Chen  }
126da39c979SShuo Chen  }
127c377920eSShuo Chen
128c377920eSShuo Chen  LOG_INFO << "All done " << timer.report(total) << " "
129da39c979SShuo Chen      << muduo::Fmt("%'ld", lines) << " lines "
130da39c979SShuo Chen      << muduo::Fmt("%'ld", count) << " count";
131c377920eSShuo Chen}
132