diff --git a/storage/rocksdb/rocksdb/USERS.md b/storage/rocksdb/rocksdb/USERS.md new file mode 100644 index 0000000000000000000000000000000000000000..339d2e90033044bd0706ced30184f18f3162fc29 --- /dev/null +++ b/storage/rocksdb/rocksdb/USERS.md @@ -0,0 +1,114 @@ +This document lists users of RocksDB and their use cases. If you are using RocksDB, please open a pull request and add yourself to the list. + +## Facebook +At Facebook, we use RocksDB as storage engines in multiple data management services and a backend for many different stateful services, including: + +1. MyRocks -- https://github.com/MySQLOnRocksDB/mysql-5.6 +2. MongoRocks -- https://github.com/mongodb-partners/mongo-rocks +3. ZippyDB -- Facebook's distributed key-value store with Paxos-style replication, built on top of RocksDB.[1] https://www.youtube.com/watch?v=DfiN7pG0D0khtt +4. Laser -- Laser is a high query throughput, low (millisecond) latency, key-value storage service built on top of RocksDB.[1] +4. Dragon -- a distributed graph query engine. https://code.facebook.com/posts/1737605303120405/dragon-a-distributed-graph-query-engine/ +5. Stylus -- a low-level stream processing framework writtenin C++.[1] +6. LogDevice -- a distributed data store for logs [2] + +[1] https://research.facebook.com/publications/realtime-data-processing-at-facebook/ + +[2] https://code.facebook.com/posts/357056558062811/logdevice-a-distributed-data-store-for-logs/ + +## LinkedIn +Two different use cases at Linkedin are using RocksDB as a storage engine: + +1. LinkedIn's follow feed for storing user's activities. Check out the blog post: https://engineering.linkedin.com/blog/2016/03/followfeed--linkedin-s-feed-made-faster-and-smarter +2. Apache Samza, open source framework for stream processing + +Learn more about those use cases in a Tech Talk by Ankit Gupta and Naveen Somasundaram: http://www.youtube.com/watch?v=plqVp_OnSzg + +## Yahoo +Yahoo is using RocksDB as a storage engine for their biggest distributed data store Sherpa. Learn more about it here: http://yahooeng.tumblr.com/post/120730204806/sherpa-scales-new-heights + +## CockroachDB +CockroachDB is an open-source geo-replicated transactional database. They are using RocksDB as their storage engine. Check out their github: https://github.com/cockroachdb/cockroach + +## DNANexus +DNANexus is using RocksDB to speed up processing of genomics data. +You can learn more from this great blog post by Mike Lin: http://devblog.dnanexus.com/faster-bam-sorting-with-samtools-and-rocksdb/ + +## Iron.io +Iron.io is using RocksDB as a storage engine for their distributed queueing system. +Learn more from Tech Talk by Reed Allman: http://www.youtube.com/watch?v=HTjt6oj-RL4 + +## Tango Me +Tango is using RocksDB as a graph storage to store all users' connection data and other social activity data. + +## Turn +Turn is using RocksDB as a storage layer for their key/value store, serving at peak 2.4MM QPS out of different datacenters. +Check out our RocksDB Protobuf merge operator at: https://github.com/vladb38/rocksdb_protobuf + +## Santander UK/Cloudera Profession Services +Check out their blog post: http://blog.cloudera.com/blog/2015/08/inside-santanders-near-real-time-data-ingest-architecture/ + +## Airbnb +Airbnb is using RocksDB as a storage engine for their personalized search service. You can learn more about it here: https://www.youtube.com/watch?v=ASQ6XMtogMs + +## Alluxio +[Alluxio](https://www.alluxio.io) uses RocksDB to serve and scale file system metadata to beyond 1 Billion files. The detailed design and implementation is described in this engineering blog: +https://www.alluxio.io/blog/scalable-metadata-service-in-alluxio-storing-billions-of-files/ + +## Pinterest +Pinterest's Object Retrieval System uses RocksDB for storage: https://www.youtube.com/watch?v=MtFEVEs_2Vo + +## Smyte +[Smyte](https://www.smyte.com/) uses RocksDB as the storage layer for their core key-value storage, high-performance counters and time-windowed HyperLogLog services. + +## Rakuten Marketing +[Rakuten Marketing](https://marketing.rakuten.com/) uses RocksDB as the disk cache layer for the real-time bidding service in their Performance DSP. + +## VWO, Wingify +[VWO's](https://vwo.com/) Smart Code checker and URL helper uses RocksDB to store all the URLs where VWO's Smart Code is installed. + +## quasardb +[quasardb](https://www.quasardb.net) is a high-performance, distributed, transactional key-value database that integrates well with in-memory analytics engines such as Apache Spark. +quasardb uses a heavily tuned RocksDB as its persistence layer. + +## Netflix +[Netflix](http://techblog.netflix.com/2016/05/application-data-caching-using-ssds.html) Netflix uses RocksDB on AWS EC2 instances with local SSD drives to cache application data. + +## TiKV +[TiKV](https://github.com/pingcap/tikv) is a GEO-replicated, high-performance, distributed, transactional key-value database. TiKV is powered by Rust and Raft. TiKV uses RocksDB as its persistence layer. + +## Apache Flink +[Apache Flink](https://flink.apache.org/news/2016/03/08/release-1.0.0.html) uses RocksDB to store state locally on a machine. + +## Dgraph +[Dgraph](https://github.com/dgraph-io/dgraph) is an open-source, scalable, distributed, low latency, high throughput Graph database .They use RocksDB to store state locally on a machine. + +## Uber +[Uber](http://eng.uber.com/cherami/) uses RocksDB as a durable and scalable task queue. + +## 360 Pika +[360](http://www.360.cn/) [Pika](https://github.com/Qihoo360/pika) is a nosql compatible with redis. With the huge amount of data stored, redis may suffer for a capacity bottleneck, and pika was born for solving it. It has widely been used in many companies. + +## LzLabs +LzLabs is using RocksDB as a storage engine in their multi-database distributed framework to store application configuration and user data. + +## ProfaneDB +[ProfaneDB](https://profanedb.gitlab.io/) is a database for Protocol Buffers, and uses RocksDB for storage. It is accessible via gRPC, and the schema is defined using directly `.proto` files. + +## IOTA Foundation + [IOTA Foundation](https://www.iota.org/) is using RocksDB in the [IOTA Reference Implementation (IRI)](https://github.com/iotaledger/iri) to store the local state of the Tangle. The Tangle is the first open-source distributed ledger powering the future of the Internet of Things. + +## Avrio Project + [Avrio Project](http://avrio-project.github.io/avrio.network/) is using RocksDB in [Avrio ](https://github.com/avrio-project/avrio) to store blocks, account balances and data and other blockchain-releated data. Avrio is a multiblockchain decentralized cryptocurrency empowering monetary transactions. + +## Crux +[Crux](https://github.com/juxt/crux) is a document database that uses RocksDB for local [EAV](https://en.wikipedia.org/wiki/Entity%E2%80%93attribute%E2%80%93value_model) index storage to enable point-in-time bitemporal Datalog queries. The "unbundled" architecture uses Kafka to provide horizontal scalability. + +## Nebula Graph +[Nebula Graph](https://github.com/vesoft-inc/nebula) is a distributed, scalable, lightning-fast, open source graph database capable of hosting super large scale graphs with dozens of billions of vertices (nodes) and trillions of edges, with milliseconds of latency. + +## YugabyteDB +[YugabyteDB](https://www.yugabyte.com/) is an open source, high performance, distributed SQL database that uses RocksDB as its storage layer. For more information, please see https://github.com/yugabyte/yugabyte-db/. + +## ArangoDB +[ArangoDB](https://www.arangodb.com/) is a native multi-model database with flexible data models for documents, graphs, and key-values, for building high performance applications using a convenient SQL-like query language or JavaScript extensions. It uses RocksDB as its sotrage engine. + diff --git a/storage/rocksdb/rocksdb/thirdparty.inc b/storage/rocksdb/rocksdb/thirdparty.inc new file mode 100644 index 0000000000000000000000000000000000000000..25ecdab88c2884241602916e7f2b8c3dece29953 --- /dev/null +++ b/storage/rocksdb/rocksdb/thirdparty.inc @@ -0,0 +1,268 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Edit definitions below to specify paths to include files and libraries of all 3rd party libraries + +# TODO: Make this work with find_package and/or get rid of it +# +# This example assumes all the libraries locate in directories under THIRDPARTY_HOME environment variable +# Set environment variable THIRDPARTY_HOME to point to your third party libraries home (Unix style dir separators) +# or change the paths below to reflect where the libraries actually reside +# +set (THIRDPARTY_LIBS "") # Initialization, don't touch + +# +# Defaults +# +set(GFLAGS_HOME $ENV{THIRDPARTY_HOME}/Gflags.Library) +set(GFLAGS_INCLUDE ${GFLAGS_HOME}/build/native/include) +set(GFLAGS_LIB_DEBUG ${GFLAGS_HOME}/lib/native/debug/amd64/gflags.lib) +set(GFLAGS_LIB_RELEASE ${GFLAGS_HOME}/lib/native/retail/amd64/gflags.lib) + +# ================================================== GFLAGS ================================================== +# For compatibility +if (GFLAGS) + set(WITH_GFLAGS ON) +endif () + +if (WITH_GFLAGS) + message(STATUS "GFLAGS library is enabled") + + if(DEFINED ENV{GFLAGS_INCLUDE}) + set(GFLAGS_INCLUDE $ENV{GFLAGS_INCLUDE}) + endif() + + if(DEFINED ENV{GFLAGS_LIB_DEBUG}) + set(GFLAGS_LIB_DEBUG $ENV{GFLAGS_LIB_DEBUG}) + endif() + + if(DEFINED ENV{GFLAGS_LIB_RELEASE}) + set(GFLAGS_LIB_RELEASE $ENV{GFLAGS_LIB_RELEASE}) + endif() + + set(GFLAGS_CXX_FLAGS -DGFLAGS=gflags) + set(GFLAGS_LIBS debug ${GFLAGS_LIB_DEBUG} optimized ${GFLAGS_LIB_RELEASE}) + + add_definitions(${GFLAGS_CXX_FLAGS}) + include_directories(${GFLAGS_INCLUDE}) + set (THIRDPARTY_LIBS ${THIRDPARTY_LIBS} ${GFLAGS_LIBS}) +else () + message(STATUS "GFLAGS library is disabled") +endif () + +# ================================================== SNAPPY ================================================== +# +# Edit these 4 lines to define paths to Snappy +# +set(SNAPPY_HOME $ENV{THIRDPARTY_HOME}/Snappy.Library) +set(SNAPPY_INCLUDE ${SNAPPY_HOME}/build/native/inc/inc) +set(SNAPPY_LIB_DEBUG ${SNAPPY_HOME}/lib/native/debug/amd64/snappy.lib) +set(SNAPPY_LIB_RELEASE ${SNAPPY_HOME}/lib/native/retail/amd64/snappy.lib) + +# For compatibility +if(SNAPPY) + set(WITH_SNAPPY ON) +endif () + +if (WITH_SNAPPY) + message(STATUS "SNAPPY library is enabled") + + if(DEFINED ENV{SNAPPY_INCLUDE}) + set(SNAPPY_INCLUDE $ENV{SNAPPY_INCLUDE}) + endif() + + if(DEFINED ENV{SNAPPY_LIB_DEBUG}) + set(SNAPPY_LIB_DEBUG $ENV{SNAPPY_LIB_DEBUG}) + endif() + + if(DEFINED ENV{SNAPPY_LIB_RELEASE}) + set(SNAPPY_LIB_RELEASE $ENV{SNAPPY_LIB_RELEASE}) + endif() + + set(SNAPPY_CXX_FLAGS -DSNAPPY) + set(SNAPPY_LIBS debug ${SNAPPY_LIB_DEBUG} optimized ${SNAPPY_LIB_RELEASE}) + + add_definitions(${SNAPPY_CXX_FLAGS}) + include_directories(${SNAPPY_INCLUDE}) + set (THIRDPARTY_LIBS ${THIRDPARTY_LIBS} ${SNAPPY_LIBS}) +else () + message(STATUS "SNAPPY library is disabled") +endif () + +# ================================================== LZ4 ================================================== +# +# Edit these 4 lines to define paths to LZ4 +# +set(LZ4_HOME $ENV{THIRDPARTY_HOME}/LZ4.Library) +set(LZ4_INCLUDE ${LZ4_HOME}/build/native/inc/inc) +set(LZ4_LIB_DEBUG ${LZ4_HOME}/lib/native/debug/amd64/lz4.lib) +set(LZ4_LIB_RELEASE ${LZ4_HOME}/lib/native/retail/amd64/lz4.lib) + + +# For compatibility +if (LZ4) + set(WITH_LZ4 ON) +endif () + +if (WITH_LZ4) + message(STATUS "LZ4 library is enabled") + + if(DEFINED ENV{LZ4_INCLUDE}) + set(LZ4_INCLUDE $ENV{LZ4_INCLUDE}) + endif() + + if(DEFINED ENV{LZ4_LIB_DEBUG}) + set(LZ4_LIB_DEBUG $ENV{LZ4_LIB_DEBUG}) + endif() + + if(DEFINED ENV{LZ4_LIB_RELEASE}) + set(LZ4_LIB_RELEASE $ENV{LZ4_LIB_RELEASE}) + endif() + + set(LZ4_CXX_FLAGS -DLZ4) + set(LZ4_LIBS debug ${LZ4_LIB_DEBUG} optimized ${LZ4_LIB_RELEASE}) + + add_definitions(${LZ4_CXX_FLAGS}) + include_directories(${LZ4_INCLUDE}) + set (THIRDPARTY_LIBS ${THIRDPARTY_LIBS} ${LZ4_LIBS}) +else () + message(STATUS "LZ4 library is disabled") +endif () + +# ================================================== ZLIB ================================================== +# +# Edit these 4 lines to define paths to ZLIB +# +set(ZLIB_HOME $ENV{THIRDPARTY_HOME}/ZLIB.Library) +set(ZLIB_INCLUDE ${ZLIB_HOME}/build/native/inc/inc) +set(ZLIB_LIB_DEBUG ${ZLIB_HOME}/lib/native/debug/amd64/zlib.lib) +set(ZLIB_LIB_RELEASE ${ZLIB_HOME}/lib/native/retail/amd64/zlib.lib) + +# For compatibilty +if (ZLIB) + set(WITH_ZLIB ON) +endif () + +if (WITH_ZLIB) + message(STATUS "ZLIB library is enabled") + + if(DEFINED ENV{ZLIB_INCLUDE}) + set(ZLIB_INCLUDE $ENV{ZLIB_INCLUDE}) + endif() + + if(DEFINED ENV{ZLIB_LIB_DEBUG}) + set(ZLIB_LIB_DEBUG $ENV{ZLIB_LIB_DEBUG}) + endif() + + if(DEFINED ENV{ZLIB_LIB_RELEASE}) + set(ZLIB_LIB_RELEASE $ENV{ZLIB_LIB_RELEASE}) + endif() + + set(ZLIB_CXX_FLAGS -DZLIB) + set(ZLIB_LIBS debug ${ZLIB_LIB_DEBUG} optimized ${ZLIB_LIB_RELEASE}) + + add_definitions(${ZLIB_CXX_FLAGS}) + include_directories(${ZLIB_INCLUDE}) + set (THIRDPARTY_LIBS ${THIRDPARTY_LIBS} ${ZLIB_LIBS}) +else () + message(STATUS "ZLIB library is disabled") +endif () + +# ================================================== XPRESS ================================================== +# This makes use of built-in Windows API, no additional includes, links to a system lib + +# For compatibilty +if (XPRESS) + set(WITH_XPRESS ON) +endif () + +if (WITH_XPRESS) + message(STATUS "XPRESS is enabled") + + add_definitions(-DXPRESS) + + # We are using the implementation provided by the system + set (SYSTEM_LIBS ${SYSTEM_LIBS} Cabinet.lib) +else () + message(STATUS "XPRESS is disabled") +endif () + + +# ================================================== ZSTD ================================================== +# +# Edit these 4 lines to define paths to ZSTD +# +set(ZSTD_HOME $ENV{THIRDPARTY_HOME}/ZSTD.Library) +set(ZSTD_INCLUDE ${ZSTD_HOME}/build/native/inc) +set(ZSTD_LIB_DEBUG ${ZSTD_HOME}/lib/native/debug/amd64/libzstd_static.lib) +set(ZSTD_LIB_RELEASE ${ZSTD_HOME}/lib/native/retail/amd64/libzstd_static.lib) + +# For compatibility +if (ZSTD) + set(WITH_ZSTD ON) +endif () + +if (WITH_ZSTD) + message(STATUS "ZSTD library is enabled") + + if(DEFINED ENV{ZSTD_INCLUDE}) + set(ZSTD_INCLUDE $ENV{ZSTD_INCLUDE}) + endif() + + if(DEFINED ENV{ZSTD_LIB_DEBUG}) + set(ZSTD_LIB_DEBUG $ENV{ZSTD_LIB_DEBUG}) + endif() + + if(DEFINED ENV{ZSTD_LIB_RELEASE}) + set(ZSTD_LIB_RELEASE $ENV{ZSTD_LIB_RELEASE}) + endif() + + # ZSTD_STATIC_LINKING_ONLY only allows us to create an allocation functions override + # When jemalloc is in use + set(ZSTD_LIBS debug ${ZSTD_LIB_DEBUG} optimized ${ZSTD_LIB_RELEASE}) + + add_definitions(-DZSTD -DZSTD_STATIC_LINKING_ONLY) + include_directories(${ZSTD_INCLUDE}) + set (THIRDPARTY_LIBS ${THIRDPARTY_LIBS} ${ZSTD_LIBS}) +else () + message(STATUS "ZSTD library is disabled") +endif () + +# +# Edit these 4 lines to define paths to Jemalloc +# +set(JEMALLOC_HOME $ENV{THIRDPARTY_HOME}/Jemalloc.Library) +set(JEMALLOC_INCLUDE ${JEMALLOC_HOME}/build/native/inc) +set(JEMALLOC_LIB_DEBUG ${JEMALLOC_HOME}/lib/native/debug/amd64/jemalloc.lib) +set(JEMALLOC_LIB_RELEASE ${JEMALLOC_HOME}/lib/native/retail/amd64/jemalloc.lib) + +# ================================================== JEMALLOC ================================================== +if(JEMALLOC) + set(WITH_JEMALLOC ON) +endif() + +if (WITH_JEMALLOC) + message(STATUS "JEMALLOC library is enabled") + set(JEMALLOC_CXX_FLAGS "-DROCKSDB_JEMALLOC -DJEMALLOC_EXPORT= -DJEMALLOC_NO_RENAME") + + if(DEFINED ENV{JEMALLOC_INCLUDE}) + set(JEMALLOC_INCLUDE $ENV{JEMALLOC_INCLUDE}) + endif() + + if(DEFINED ENV{JEMALLOC_LIB_DEBUG}) + set(JEMALLOC_LIB_DEBUG $ENV{JEMALLOC_LIB_DEBUG}) + endif() + + if(DEFINED ENV{JEMALLOC_LIB_RELEASE}) + set(JEMALLOC_LIB_RELEASE $ENV{JEMALLOC_LIB_RELEASE}) + endif() + + set(JEMALLOC_LIBS debug ${JEMALLOC_LIB_DEBUG} optimized ${JEMALLOC_LIB_RELEASE}) + + add_definitions(${JEMALLOC_CXX_FLAGS}) + include_directories(${JEMALLOC_INCLUDE}) + set (THIRDPARTY_LIBS ${THIRDPARTY_LIBS} ${JEMALLOC_LIBS}) + set (ARTIFACT_SUFFIX "_je") + +else () + set (ARTIFACT_SUFFIX "") + message(STATUS "JEMALLOC library is disabled") +endif ()