env_posix.cc 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893
  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include <dirent.h>
  5. #include <fcntl.h>
  6. #include <pthread.h>
  7. #include <sys/mman.h>
  8. #include <sys/resource.h>
  9. #include <sys/stat.h>
  10. #include <sys/time.h>
  11. #include <sys/types.h>
  12. #include <unistd.h>
  13. #include <atomic>
  14. #include <cerrno>
  15. #include <cstddef>
  16. #include <cstdint>
  17. #include <cstdio>
  18. #include <cstdlib>
  19. #include <cstring>
  20. #include <limits>
  21. #include <queue>
  22. #include <set>
  23. #include <string>
  24. #include <thread>
  25. #include <type_traits>
  26. #include <utility>
  27. #include "leveldb/env.h"
  28. #include "leveldb/slice.h"
  29. #include "leveldb/status.h"
  30. #include "port/port.h"
  31. #include "port/thread_annotations.h"
  32. #include "util/env_posix_test_helper.h"
  33. #include "util/posix_logger.h"
  34. namespace leveldb {
  35. namespace {
  36. // Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles().
  37. int g_open_read_only_file_limit = -1;
  38. // Up to 1000 mmap regions for 64-bit binaries; none for 32-bit.
  39. constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 1000 : 0;
  40. // Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit().
  41. int g_mmap_limit = kDefaultMmapLimit;
  42. // Common flags defined for all posix open operations
  43. #if defined(HAVE_O_CLOEXEC)
  44. constexpr const int kOpenBaseFlags = O_CLOEXEC;
  45. #else
  46. constexpr const int kOpenBaseFlags = 0;
  47. #endif // defined(HAVE_O_CLOEXEC)
  48. constexpr const size_t kWritableFileBufferSize = 65536;
  49. Status PosixError(const std::string& context, int error_number) {
  50. if (error_number == ENOENT) {
  51. return Status::NotFound(context, std::strerror(error_number));
  52. } else {
  53. return Status::IOError(context, std::strerror(error_number));
  54. }
  55. }
  56. // Helper class to limit resource usage to avoid exhaustion.
  57. // Currently used to limit read-only file descriptors and mmap file usage
  58. // so that we do not run out of file descriptors or virtual memory, or run into
  59. // kernel performance problems for very large databases.
  60. class Limiter {
  61. public:
  62. // Limit maximum number of resources to |max_acquires|.
  63. Limiter(int max_acquires) : acquires_allowed_(max_acquires) {}
  64. Limiter(const Limiter&) = delete;
  65. Limiter operator=(const Limiter&) = delete;
  66. // If another resource is available, acquire it and return true.
  67. // Else return false.
  68. bool Acquire() {
  69. int old_acquires_allowed =
  70. acquires_allowed_.fetch_sub(1, std::memory_order_relaxed);
  71. if (old_acquires_allowed > 0) return true;
  72. acquires_allowed_.fetch_add(1, std::memory_order_relaxed);
  73. return false;
  74. }
  75. // Release a resource acquired by a previous call to Acquire() that returned
  76. // true.
  77. void Release() { acquires_allowed_.fetch_add(1, std::memory_order_relaxed); }
  78. private:
  79. // The number of available resources.
  80. //
  81. // This is a counter and is not tied to the invariants of any other class, so
  82. // it can be operated on safely using std::memory_order_relaxed.
  83. std::atomic<int> acquires_allowed_;
  84. };
  85. // Implements sequential read access in a file using read().
  86. //
  87. // Instances of this class are thread-friendly but not thread-safe, as required
  88. // by the SequentialFile API.
  89. class PosixSequentialFile final : public SequentialFile {
  90. public:
  91. PosixSequentialFile(std::string filename, int fd)
  92. : fd_(fd), filename_(filename) {}
  93. ~PosixSequentialFile() override { close(fd_); }
  94. Status Read(size_t n, Slice* result, char* scratch) override {
  95. Status status;
  96. while (true) {
  97. ::ssize_t read_size = ::read(fd_, scratch, n);
  98. if (read_size < 0) { // Read error.
  99. if (errno == EINTR) {
  100. continue; // Retry
  101. }
  102. status = PosixError(filename_, errno);
  103. break;
  104. }
  105. *result = Slice(scratch, read_size);
  106. break;
  107. }
  108. return status;
  109. }
  110. Status Skip(uint64_t n) override {
  111. if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) {
  112. return PosixError(filename_, errno);
  113. }
  114. return Status::OK();
  115. }
  116. private:
  117. const int fd_;
  118. const std::string filename_;
  119. };
  120. // Implements random read access in a file using pread().
  121. //
  122. // Instances of this class are thread-safe, as required by the RandomAccessFile
  123. // API. Instances are immutable and Read() only calls thread-safe library
  124. // functions.
  125. class PosixRandomAccessFile final : public RandomAccessFile {
  126. public:
  127. // The new instance takes ownership of |fd|. |fd_limiter| must outlive this
  128. // instance, and will be used to determine if .
  129. PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter)
  130. : has_permanent_fd_(fd_limiter->Acquire()),
  131. fd_(has_permanent_fd_ ? fd : -1),
  132. fd_limiter_(fd_limiter),
  133. filename_(std::move(filename)) {
  134. if (!has_permanent_fd_) {
  135. assert(fd_ == -1);
  136. ::close(fd); // The file will be opened on every read.
  137. }
  138. }
  139. ~PosixRandomAccessFile() override {
  140. if (has_permanent_fd_) {
  141. assert(fd_ != -1);
  142. ::close(fd_);
  143. fd_limiter_->Release();
  144. }
  145. }
  146. Status Read(uint64_t offset, size_t n, Slice* result,
  147. char* scratch) const override {
  148. int fd = fd_;
  149. if (!has_permanent_fd_) {
  150. fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags);
  151. if (fd < 0) {
  152. return PosixError(filename_, errno);
  153. }
  154. }
  155. assert(fd != -1);
  156. Status status;
  157. ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset));
  158. *result = Slice(scratch, (read_size < 0) ? 0 : read_size);
  159. if (read_size < 0) {
  160. // An error: return a non-ok status.
  161. status = PosixError(filename_, errno);
  162. }
  163. if (!has_permanent_fd_) {
  164. // Close the temporary file descriptor opened earlier.
  165. assert(fd != fd_);
  166. ::close(fd);
  167. }
  168. return status;
  169. }
  170. private:
  171. const bool has_permanent_fd_; // If false, the file is opened on every read.
  172. const int fd_; // -1 if has_permanent_fd_ is false.
  173. Limiter* const fd_limiter_;
  174. const std::string filename_;
  175. };
  176. // Implements random read access in a file using mmap().
  177. //
  178. // Instances of this class are thread-safe, as required by the RandomAccessFile
  179. // API. Instances are immutable and Read() only calls thread-safe library
  180. // functions.
  181. class PosixMmapReadableFile final : public RandomAccessFile {
  182. public:
  183. // mmap_base[0, length-1] points to the memory-mapped contents of the file. It
  184. // must be the result of a successful call to mmap(). This instances takes
  185. // over the ownership of the region.
  186. //
  187. // |mmap_limiter| must outlive this instance. The caller must have already
  188. // aquired the right to use one mmap region, which will be released when this
  189. // instance is destroyed.
  190. PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length,
  191. Limiter* mmap_limiter)
  192. : mmap_base_(mmap_base),
  193. length_(length),
  194. mmap_limiter_(mmap_limiter),
  195. filename_(std::move(filename)) {}
  196. ~PosixMmapReadableFile() override {
  197. ::munmap(static_cast<void*>(mmap_base_), length_);
  198. mmap_limiter_->Release();
  199. }
  200. Status Read(uint64_t offset, size_t n, Slice* result,
  201. char* scratch) const override {
  202. if (offset + n > length_) {
  203. *result = Slice();
  204. return PosixError(filename_, EINVAL);
  205. }
  206. *result = Slice(mmap_base_ + offset, n);
  207. return Status::OK();
  208. }
  209. private:
  210. char* const mmap_base_;
  211. const size_t length_;
  212. Limiter* const mmap_limiter_;
  213. const std::string filename_;
  214. };
  215. class PosixWritableFile final : public WritableFile {
  216. public:
  217. PosixWritableFile(std::string filename, int fd)
  218. : pos_(0),
  219. fd_(fd),
  220. is_manifest_(IsManifest(filename)),
  221. filename_(std::move(filename)),
  222. dirname_(Dirname(filename_)) {}
  223. ~PosixWritableFile() override {
  224. if (fd_ >= 0) {
  225. // Ignoring any potential errors
  226. Close();
  227. }
  228. }
  229. Status Append(const Slice& data) override {
  230. size_t write_size = data.size();
  231. const char* write_data = data.data();
  232. // Fit as much as possible into buffer.
  233. size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_);
  234. std::memcpy(buf_ + pos_, write_data, copy_size);
  235. write_data += copy_size;
  236. write_size -= copy_size;
  237. pos_ += copy_size;
  238. if (write_size == 0) {
  239. return Status::OK();
  240. }
  241. // Can't fit in buffer, so need to do at least one write.
  242. Status status = FlushBuffer();
  243. if (!status.ok()) {
  244. return status;
  245. }
  246. // Small writes go to buffer, large writes are written directly.
  247. if (write_size < kWritableFileBufferSize) {
  248. std::memcpy(buf_, write_data, write_size);
  249. pos_ = write_size;
  250. return Status::OK();
  251. }
  252. return WriteUnbuffered(write_data, write_size);
  253. }
  254. Status Close() override {
  255. Status status = FlushBuffer();
  256. const int close_result = ::close(fd_);
  257. if (close_result < 0 && status.ok()) {
  258. status = PosixError(filename_, errno);
  259. }
  260. fd_ = -1;
  261. return status;
  262. }
  263. Status Flush() override { return FlushBuffer(); }
  264. Status Sync() override {
  265. // Ensure new files referred to by the manifest are in the filesystem.
  266. //
  267. // This needs to happen before the manifest file is flushed to disk, to
  268. // avoid crashing in a state where the manifest refers to files that are not
  269. // yet on disk.
  270. Status status = SyncDirIfManifest();
  271. if (!status.ok()) {
  272. return status;
  273. }
  274. status = FlushBuffer();
  275. if (!status.ok()) {
  276. return status;
  277. }
  278. return SyncFd(fd_, filename_);
  279. }
  280. private:
  281. Status FlushBuffer() {
  282. Status status = WriteUnbuffered(buf_, pos_);
  283. pos_ = 0;
  284. return status;
  285. }
  286. Status WriteUnbuffered(const char* data, size_t size) {
  287. while (size > 0) {
  288. ssize_t write_result = ::write(fd_, data, size);
  289. if (write_result < 0) {
  290. if (errno == EINTR) {
  291. continue; // Retry
  292. }
  293. return PosixError(filename_, errno);
  294. }
  295. data += write_result;
  296. size -= write_result;
  297. }
  298. return Status::OK();
  299. }
  300. Status SyncDirIfManifest() {
  301. Status status;
  302. if (!is_manifest_) {
  303. return status;
  304. }
  305. int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags);
  306. if (fd < 0) {
  307. status = PosixError(dirname_, errno);
  308. } else {
  309. status = SyncFd(fd, dirname_);
  310. ::close(fd);
  311. }
  312. return status;
  313. }
  314. // Ensures that all the caches associated with the given file descriptor's
  315. // data are flushed all the way to durable media, and can withstand power
  316. // failures.
  317. //
  318. // The path argument is only used to populate the description string in the
  319. // returned Status if an error occurs.
  320. static Status SyncFd(int fd, const std::string& fd_path) {
  321. #if HAVE_FULLFSYNC
  322. // On macOS and iOS, fsync() doesn't guarantee durability past power
  323. // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some
  324. // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to
  325. // fsync().
  326. if (::fcntl(fd, F_FULLFSYNC) == 0) {
  327. return Status::OK();
  328. }
  329. #endif // HAVE_FULLFSYNC
  330. #if HAVE_FDATASYNC
  331. bool sync_success = ::fdatasync(fd) == 0;
  332. #else
  333. bool sync_success = ::fsync(fd) == 0;
  334. #endif // HAVE_FDATASYNC
  335. if (sync_success) {
  336. return Status::OK();
  337. }
  338. return PosixError(fd_path, errno);
  339. }
  340. // Returns the directory name in a path pointing to a file.
  341. //
  342. // Returns "." if the path does not contain any directory separator.
  343. static std::string Dirname(const std::string& filename) {
  344. std::string::size_type separator_pos = filename.rfind('/');
  345. if (separator_pos == std::string::npos) {
  346. return std::string(".");
  347. }
  348. // The filename component should not contain a path separator. If it does,
  349. // the splitting was done incorrectly.
  350. assert(filename.find('/', separator_pos + 1) == std::string::npos);
  351. return filename.substr(0, separator_pos);
  352. }
  353. // Extracts the file name from a path pointing to a file.
  354. //
  355. // The returned Slice points to |filename|'s data buffer, so it is only valid
  356. // while |filename| is alive and unchanged.
  357. static Slice Basename(const std::string& filename) {
  358. std::string::size_type separator_pos = filename.rfind('/');
  359. if (separator_pos == std::string::npos) {
  360. return Slice(filename);
  361. }
  362. // The filename component should not contain a path separator. If it does,
  363. // the splitting was done incorrectly.
  364. assert(filename.find('/', separator_pos + 1) == std::string::npos);
  365. return Slice(filename.data() + separator_pos + 1,
  366. filename.length() - separator_pos - 1);
  367. }
  368. // True if the given file is a manifest file.
  369. static bool IsManifest(const std::string& filename) {
  370. return Basename(filename).starts_with("MANIFEST");
  371. }
  372. // buf_[0, pos_ - 1] contains data to be written to fd_.
  373. char buf_[kWritableFileBufferSize];
  374. size_t pos_;
  375. int fd_;
  376. const bool is_manifest_; // True if the file's name starts with MANIFEST.
  377. const std::string filename_;
  378. const std::string dirname_; // The directory of filename_.
  379. };
  380. int LockOrUnlock(int fd, bool lock) {
  381. errno = 0;
  382. struct ::flock file_lock_info;
  383. std::memset(&file_lock_info, 0, sizeof(file_lock_info));
  384. file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK);
  385. file_lock_info.l_whence = SEEK_SET;
  386. file_lock_info.l_start = 0;
  387. file_lock_info.l_len = 0; // Lock/unlock entire file.
  388. return ::fcntl(fd, F_SETLK, &file_lock_info);
  389. }
  390. // Instances are thread-safe because they are immutable.
  391. class PosixFileLock : public FileLock {
  392. public:
  393. PosixFileLock(int fd, std::string filename)
  394. : fd_(fd), filename_(std::move(filename)) {}
  395. int fd() const { return fd_; }
  396. const std::string& filename() const { return filename_; }
  397. private:
  398. const int fd_;
  399. const std::string filename_;
  400. };
  401. // Tracks the files locked by PosixEnv::LockFile().
  402. //
  403. // We maintain a separate set instead of relying on fcntl(F_SETLK) because
  404. // fcntl(F_SETLK) does not provide any protection against multiple uses from the
  405. // same process.
  406. //
  407. // Instances are thread-safe because all member data is guarded by a mutex.
  408. class PosixLockTable {
  409. public:
  410. bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) {
  411. mu_.Lock();
  412. bool succeeded = locked_files_.insert(fname).second;
  413. mu_.Unlock();
  414. return succeeded;
  415. }
  416. void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) {
  417. mu_.Lock();
  418. locked_files_.erase(fname);
  419. mu_.Unlock();
  420. }
  421. private:
  422. port::Mutex mu_;
  423. std::set<std::string> locked_files_ GUARDED_BY(mu_);
  424. };
  425. class PosixEnv : public Env {
  426. public:
  427. PosixEnv();
  428. ~PosixEnv() override {
  429. static const char msg[] =
  430. "PosixEnv singleton destroyed. Unsupported behavior!\n";
  431. std::fwrite(msg, 1, sizeof(msg), stderr);
  432. std::abort();
  433. }
  434. Status NewSequentialFile(const std::string& filename,
  435. SequentialFile** result) override {
  436. int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
  437. if (fd < 0) {
  438. *result = nullptr;
  439. return PosixError(filename, errno);
  440. }
  441. *result = new PosixSequentialFile(filename, fd);
  442. return Status::OK();
  443. }
  444. Status NewRandomAccessFile(const std::string& filename,
  445. RandomAccessFile** result) override {
  446. *result = nullptr;
  447. int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
  448. if (fd < 0) {
  449. return PosixError(filename, errno);
  450. }
  451. if (!mmap_limiter_.Acquire()) {
  452. *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_);
  453. return Status::OK();
  454. }
  455. uint64_t file_size;
  456. Status status = GetFileSize(filename, &file_size);
  457. if (status.ok()) {
  458. void* mmap_base =
  459. ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
  460. if (mmap_base != MAP_FAILED) {
  461. *result = new PosixMmapReadableFile(filename,
  462. reinterpret_cast<char*>(mmap_base),
  463. file_size, &mmap_limiter_);
  464. } else {
  465. status = PosixError(filename, errno);
  466. }
  467. }
  468. ::close(fd);
  469. if (!status.ok()) {
  470. mmap_limiter_.Release();
  471. }
  472. return status;
  473. }
  474. Status NewWritableFile(const std::string& filename,
  475. WritableFile** result) override {
  476. int fd = ::open(filename.c_str(),
  477. O_TRUNC | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
  478. if (fd < 0) {
  479. *result = nullptr;
  480. return PosixError(filename, errno);
  481. }
  482. *result = new PosixWritableFile(filename, fd);
  483. return Status::OK();
  484. }
  485. Status NewAppendableFile(const std::string& filename,
  486. WritableFile** result) override {
  487. int fd = ::open(filename.c_str(),
  488. O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
  489. if (fd < 0) {
  490. *result = nullptr;
  491. return PosixError(filename, errno);
  492. }
  493. *result = new PosixWritableFile(filename, fd);
  494. return Status::OK();
  495. }
  496. bool FileExists(const std::string& filename) override {
  497. return ::access(filename.c_str(), F_OK) == 0;
  498. }
  499. Status GetChildren(const std::string& directory_path,
  500. std::vector<std::string>* result) override {
  501. result->clear();
  502. ::DIR* dir = ::opendir(directory_path.c_str());
  503. if (dir == nullptr) {
  504. return PosixError(directory_path, errno);
  505. }
  506. struct ::dirent* entry;
  507. while ((entry = ::readdir(dir)) != nullptr) {
  508. result->emplace_back(entry->d_name);
  509. }
  510. ::closedir(dir);
  511. return Status::OK();
  512. }
  513. Status RemoveFile(const std::string& filename) override {
  514. if (::unlink(filename.c_str()) != 0) {
  515. return PosixError(filename, errno);
  516. }
  517. return Status::OK();
  518. }
  519. Status CreateDir(const std::string& dirname) override {
  520. if (::mkdir(dirname.c_str(), 0755) != 0) {
  521. return PosixError(dirname, errno);
  522. }
  523. return Status::OK();
  524. }
  525. Status RemoveDir(const std::string& dirname) override {
  526. if (::rmdir(dirname.c_str()) != 0) {
  527. return PosixError(dirname, errno);
  528. }
  529. return Status::OK();
  530. }
  531. Status GetFileSize(const std::string& filename, uint64_t* size) override {
  532. struct ::stat file_stat;
  533. if (::stat(filename.c_str(), &file_stat) != 0) {
  534. *size = 0;
  535. return PosixError(filename, errno);
  536. }
  537. *size = file_stat.st_size;
  538. return Status::OK();
  539. }
  540. Status RenameFile(const std::string& from, const std::string& to) override {
  541. if (std::rename(from.c_str(), to.c_str()) != 0) {
  542. return PosixError(from, errno);
  543. }
  544. return Status::OK();
  545. }
  546. Status LockFile(const std::string& filename, FileLock** lock) override {
  547. *lock = nullptr;
  548. int fd = ::open(filename.c_str(), O_RDWR | O_CREAT | kOpenBaseFlags, 0644);
  549. if (fd < 0) {
  550. return PosixError(filename, errno);
  551. }
  552. if (!locks_.Insert(filename)) {
  553. ::close(fd);
  554. return Status::IOError("lock " + filename, "already held by process");
  555. }
  556. if (LockOrUnlock(fd, true) == -1) {
  557. int lock_errno = errno;
  558. ::close(fd);
  559. locks_.Remove(filename);
  560. return PosixError("lock " + filename, lock_errno);
  561. }
  562. *lock = new PosixFileLock(fd, filename);
  563. return Status::OK();
  564. }
  565. Status UnlockFile(FileLock* lock) override {
  566. PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock);
  567. if (LockOrUnlock(posix_file_lock->fd(), false) == -1) {
  568. return PosixError("unlock " + posix_file_lock->filename(), errno);
  569. }
  570. locks_.Remove(posix_file_lock->filename());
  571. ::close(posix_file_lock->fd());
  572. delete posix_file_lock;
  573. return Status::OK();
  574. }
  575. void Schedule(void (*background_work_function)(void* background_work_arg),
  576. void* background_work_arg) override;
  577. void StartThread(void (*thread_main)(void* thread_main_arg),
  578. void* thread_main_arg) override {
  579. std::thread new_thread(thread_main, thread_main_arg);
  580. new_thread.detach();
  581. }
  582. Status GetTestDirectory(std::string* result) override {
  583. const char* env = std::getenv("TEST_TMPDIR");
  584. if (env && env[0] != '\0') {
  585. *result = env;
  586. } else {
  587. char buf[100];
  588. std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d",
  589. static_cast<int>(::geteuid()));
  590. *result = buf;
  591. }
  592. // The CreateDir status is ignored because the directory may already exist.
  593. CreateDir(*result);
  594. return Status::OK();
  595. }
  596. Status NewLogger(const std::string& filename, Logger** result) override {
  597. int fd = ::open(filename.c_str(),
  598. O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
  599. if (fd < 0) {
  600. *result = nullptr;
  601. return PosixError(filename, errno);
  602. }
  603. std::FILE* fp = ::fdopen(fd, "w");
  604. if (fp == nullptr) {
  605. ::close(fd);
  606. *result = nullptr;
  607. return PosixError(filename, errno);
  608. } else {
  609. *result = new PosixLogger(fp);
  610. return Status::OK();
  611. }
  612. }
  613. uint64_t NowMicros() override {
  614. static constexpr uint64_t kUsecondsPerSecond = 1000000;
  615. struct ::timeval tv;
  616. ::gettimeofday(&tv, nullptr);
  617. return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec;
  618. }
  619. void SleepForMicroseconds(int micros) override {
  620. std::this_thread::sleep_for(std::chrono::microseconds(micros));
  621. }
  622. private:
  623. void BackgroundThreadMain();
  624. static void BackgroundThreadEntryPoint(PosixEnv* env) {
  625. env->BackgroundThreadMain();
  626. }
  627. // Stores the work item data in a Schedule() call.
  628. //
  629. // Instances are constructed on the thread calling Schedule() and used on the
  630. // background thread.
  631. //
  632. // This structure is thread-safe beacuse it is immutable.
  633. struct BackgroundWorkItem {
  634. explicit BackgroundWorkItem(void (*function)(void* arg), void* arg)
  635. : function(function), arg(arg) {}
  636. void (*const function)(void*);
  637. void* const arg;
  638. };
  639. port::Mutex background_work_mutex_;
  640. port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_);
  641. bool started_background_thread_ GUARDED_BY(background_work_mutex_);
  642. std::queue<BackgroundWorkItem> background_work_queue_
  643. GUARDED_BY(background_work_mutex_);
  644. PosixLockTable locks_; // Thread-safe.
  645. Limiter mmap_limiter_; // Thread-safe.
  646. Limiter fd_limiter_; // Thread-safe.
  647. };
  648. // Return the maximum number of concurrent mmaps.
  649. int MaxMmaps() { return g_mmap_limit; }
  650. // Return the maximum number of read-only files to keep open.
  651. int MaxOpenFiles() {
  652. if (g_open_read_only_file_limit >= 0) {
  653. return g_open_read_only_file_limit;
  654. }
  655. struct ::rlimit rlim;
  656. if (::getrlimit(RLIMIT_NOFILE, &rlim)) {
  657. // getrlimit failed, fallback to hard-coded default.
  658. g_open_read_only_file_limit = 50;
  659. } else if (rlim.rlim_cur == RLIM_INFINITY) {
  660. g_open_read_only_file_limit = std::numeric_limits<int>::max();
  661. } else {
  662. // Allow use of 20% of available file descriptors for read-only files.
  663. g_open_read_only_file_limit = (uint32_t)rlim.rlim_cur / 5;
  664. }
  665. return g_open_read_only_file_limit;
  666. }
  667. } // namespace
  668. PosixEnv::PosixEnv()
  669. : background_work_cv_(&background_work_mutex_),
  670. started_background_thread_(false),
  671. mmap_limiter_(MaxMmaps()),
  672. fd_limiter_(MaxOpenFiles()) {}
  673. void PosixEnv::Schedule(
  674. void (*background_work_function)(void* background_work_arg),
  675. void* background_work_arg) {
  676. background_work_mutex_.Lock();
  677. // Start the background thread, if we haven't done so already.
  678. if (!started_background_thread_) {
  679. started_background_thread_ = true;
  680. std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this);
  681. background_thread.detach();
  682. }
  683. // If the queue is empty, the background thread may be waiting for work.
  684. if (background_work_queue_.empty()) {
  685. background_work_cv_.Signal();
  686. }
  687. background_work_queue_.emplace(background_work_function, background_work_arg);
  688. background_work_mutex_.Unlock();
  689. }
  690. void PosixEnv::BackgroundThreadMain() {
  691. while (true) {
  692. background_work_mutex_.Lock();
  693. // Wait until there is work to be done.
  694. while (background_work_queue_.empty()) {
  695. background_work_cv_.Wait();
  696. }
  697. assert(!background_work_queue_.empty());
  698. auto background_work_function = background_work_queue_.front().function;
  699. void* background_work_arg = background_work_queue_.front().arg;
  700. background_work_queue_.pop();
  701. background_work_mutex_.Unlock();
  702. background_work_function(background_work_arg);
  703. }
  704. }
  705. namespace {
  706. // Wraps an Env instance whose destructor is never created.
  707. //
  708. // Intended usage:
  709. // using PlatformSingletonEnv = SingletonEnv<PlatformEnv>;
  710. // void ConfigurePosixEnv(int param) {
  711. // PlatformSingletonEnv::AssertEnvNotInitialized();
  712. // // set global configuration flags.
  713. // }
  714. // Env* Env::Default() {
  715. // static PlatformSingletonEnv default_env;
  716. // return default_env.env();
  717. // }
  718. template <typename EnvType>
  719. class SingletonEnv {
  720. public:
  721. SingletonEnv() {
  722. #if !defined(NDEBUG)
  723. env_initialized_.store(true, std::memory_order::memory_order_relaxed);
  724. #endif // !defined(NDEBUG)
  725. static_assert(sizeof(env_storage_) >= sizeof(EnvType),
  726. "env_storage_ will not fit the Env");
  727. static_assert(alignof(decltype(env_storage_)) >= alignof(EnvType),
  728. "env_storage_ does not meet the Env's alignment needs");
  729. new (&env_storage_) EnvType();
  730. }
  731. ~SingletonEnv() = default;
  732. SingletonEnv(const SingletonEnv&) = delete;
  733. SingletonEnv& operator=(const SingletonEnv&) = delete;
  734. Env* env() { return reinterpret_cast<Env*>(&env_storage_); }
  735. static void AssertEnvNotInitialized() {
  736. #if !defined(NDEBUG)
  737. assert(!env_initialized_.load(std::memory_order::memory_order_relaxed));
  738. #endif // !defined(NDEBUG)
  739. }
  740. private:
  741. typename std::aligned_storage<sizeof(EnvType), alignof(EnvType)>::type
  742. env_storage_;
  743. #if !defined(NDEBUG)
  744. static std::atomic<bool> env_initialized_;
  745. #endif // !defined(NDEBUG)
  746. };
  747. #if !defined(NDEBUG)
  748. template <typename EnvType>
  749. std::atomic<bool> SingletonEnv<EnvType>::env_initialized_;
  750. #endif // !defined(NDEBUG)
  751. using PosixDefaultEnv = SingletonEnv<PosixEnv>;
  752. } // namespace
  753. void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) {
  754. PosixDefaultEnv::AssertEnvNotInitialized();
  755. g_open_read_only_file_limit = limit;
  756. }
  757. void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) {
  758. PosixDefaultEnv::AssertEnvNotInitialized();
  759. g_mmap_limit = limit;
  760. }
  761. Env* Env::Default() {
  762. static PosixDefaultEnv env_container;
  763. return env_container.env();
  764. }
  765. } // namespace leveldb