185147189SShuo Chen#pragma once 285147189SShuo Chen 385147189SShuo Chen#include <stdio.h> 46f2e1683SShuo Chen#include <fstream> 585147189SShuo Chen#include <memory> 685147189SShuo Chen#include <string> 72cf09315SShuo Chen#include "absl/strings/string_view.h" 8c377920eSShuo Chen#include "muduo/base/Logging.h" // CHECK_NOTNULL 985147189SShuo Chen 106f2e1683SShuo Chenconst int kBufferSize = 1024 * 1024; 116f2e1683SShuo Chen 1285147189SShuo Chen// Wrappers FILE* from stdio. 1385147189SShuo Chenclass File 1485147189SShuo Chen{ 1585147189SShuo Chen public: 16c377920eSShuo Chen int64_t tell() const 1785147189SShuo Chen { 1885147189SShuo Chen return ::ftell(file_); 1985147189SShuo Chen } 2085147189SShuo Chen 2185147189SShuo Chen void close() 2285147189SShuo Chen { 2385147189SShuo Chen if (file_) 2485147189SShuo Chen ::fclose(file_); 2585147189SShuo Chen file_ = nullptr; 2685147189SShuo Chen buffer_.reset(); 2785147189SShuo Chen } 2885147189SShuo Chen 29c377920eSShuo Chen const std::string& filename() const 30c377920eSShuo Chen { 31c377920eSShuo Chen return filename_; 32c377920eSShuo Chen } 33c377920eSShuo Chen 3485147189SShuo Chen // https://github.com/coreutils/coreutils/blob/master/src/ioblksize.h 3585147189SShuo Chen /* As of May 2014, 128KiB is determined to be the minimium 3685147189SShuo Chen * blksize to best minimize system call overhead. 3785147189SShuo Chen */ 3885147189SShuo Chen 39c377920eSShuo Chen protected: 4085147189SShuo Chen File(const std::string& filename, const char* mode, int bufsize=kBufferSize) 41c377920eSShuo Chen : filename_(filename), 42c377920eSShuo Chen file_(CHECK_NOTNULL(::fopen(filename.c_str(), mode))), 4385147189SShuo Chen buffer_(CHECK_NOTNULL(new char[bufsize])) 4485147189SShuo Chen { 4585147189SShuo Chen ::setbuffer(file_, buffer_.get(), bufsize); 4685147189SShuo Chen } 4785147189SShuo Chen 4885147189SShuo Chen virtual ~File() 4985147189SShuo Chen { 5085147189SShuo Chen close(); 5185147189SShuo Chen } 5285147189SShuo Chen 5385147189SShuo Chen protected: 54c377920eSShuo Chen std::string filename_; 5585147189SShuo Chen FILE* file_ = nullptr; 5685147189SShuo Chen 5785147189SShuo Chen private: 5885147189SShuo Chen std::unique_ptr<char[]> buffer_; 5985147189SShuo Chen 6085147189SShuo Chen File(const File&) = delete; 6185147189SShuo Chen void operator=(const File&) = delete; 6285147189SShuo Chen}; 6385147189SShuo Chen 6485147189SShuo Chenclass InputFile : public File 6585147189SShuo Chen{ 6685147189SShuo Chen public: 6785147189SShuo Chen explicit InputFile(const char* filename, int bufsize=kBufferSize) 6885147189SShuo Chen : File(filename, "r", bufsize) 6985147189SShuo Chen { 7085147189SShuo Chen } 7185147189SShuo Chen 7285147189SShuo Chen bool getline(std::string* output) 7385147189SShuo Chen { 746f2e1683SShuo Chen char buf[1024]; // ="" will slow down by 50%!!! 7585147189SShuo Chen if (::fgets(buf, sizeof buf, file_)) 7685147189SShuo Chen { 7785147189SShuo Chen *output = buf; 7885147189SShuo Chen if (!output->empty() && output->back() == '\n') 7985147189SShuo Chen { 8085147189SShuo Chen output->resize(output->size()-1); 8185147189SShuo Chen } 8285147189SShuo Chen return true; 8385147189SShuo Chen } 8485147189SShuo Chen return false; 8585147189SShuo Chen } 8685147189SShuo Chen}; 8785147189SShuo Chen 88da39c979SShuo Chen/* 896f2e1683SShuo Chenclass InputFile2 906f2e1683SShuo Chen{ 916f2e1683SShuo Chen public: 926f2e1683SShuo Chen explicit InputFile2(const char* filename, int bufsize=kBufferSize) 936f2e1683SShuo Chen : filename_(filename), 946f2e1683SShuo Chen in_(filename) 956f2e1683SShuo Chen { 966f2e1683SShuo Chen // FIXME: bufsize 976f2e1683SShuo Chen } 986f2e1683SShuo Chen 996f2e1683SShuo Chen bool getline(std::string* output) 1006f2e1683SShuo Chen { 1016f2e1683SShuo Chen return static_cast<bool>(std::getline(in_, *output)); 1026f2e1683SShuo Chen } 1036f2e1683SShuo Chen 1046f2e1683SShuo Chen const std::string& filename() const 1056f2e1683SShuo Chen { 1066f2e1683SShuo Chen return filename_; 1076f2e1683SShuo Chen } 1086f2e1683SShuo Chen 1096f2e1683SShuo Chen private: 1106f2e1683SShuo Chen std::string filename_; 1116f2e1683SShuo Chen std::ifstream in_; 1126f2e1683SShuo Chen}; 113da39c979SShuo Chen*/ 1146f2e1683SShuo Chen 11585147189SShuo Chenclass OutputFile : public File 11685147189SShuo Chen{ 11785147189SShuo Chen public: 11885147189SShuo Chen explicit OutputFile(const std::string& filename) 11985147189SShuo Chen : File(filename, "w") 12085147189SShuo Chen { 12185147189SShuo Chen } 12285147189SShuo Chen 1232cf09315SShuo Chen void write(absl::string_view s) 12485147189SShuo Chen { 12585147189SShuo Chen ::fwrite(s.data(), 1, s.size(), file_); 12685147189SShuo Chen } 12785147189SShuo Chen 128da39c979SShuo Chen void writeWord(int64_t count, absl::string_view word) 129da39c979SShuo Chen { 130da39c979SShuo Chen ::fprintf(file_, "%ld\t", count); 131da39c979SShuo Chen ::fwrite(word.data(), 1, word.size(), file_); 132da39c979SShuo Chen ::fwrite("\n", 1, 1, file_); 133da39c979SShuo Chen } 134da39c979SShuo Chen 1352cf09315SShuo Chen void appendRecord(absl::string_view s) 13685147189SShuo Chen { 13785147189SShuo Chen assert(s.size() < 255); 13885147189SShuo Chen uint8_t len = s.size(); 13985147189SShuo Chen ::fwrite(&len, 1, sizeof len, file_); 14085147189SShuo Chen ::fwrite(s.data(), 1, len, file_); 14185147189SShuo Chen ++items_; 14285147189SShuo Chen } 14385147189SShuo Chen 14485147189SShuo Chen size_t items() 14585147189SShuo Chen { 14685147189SShuo Chen return items_; 14785147189SShuo Chen } 14885147189SShuo Chen 14985147189SShuo Chen private: 15085147189SShuo Chen size_t items_ = 0; 15185147189SShuo Chen}; 15285147189SShuo Chen 153