local-scylladb-cluster/scylla.yaml

# Scylla storage config YAML

#######################################
# This file is split to two sections:
# 1. Supported parameters
# 2. Unsupported parameters: reserved for future use or backwards
#    compatibility.
# Scylla will only read and use the first segment
#######################################

### Supported Parameters

# The name of the cluster. This is mainly used to prevent machines in
# one logical cluster from joining another.
# It is recommended to change the default value when creating a new cluster.
# You can NOT modify this value for an existing cluster
#cluster_name: 'Test Cluster'

# This defines the number of tokens randomly assigned to this node on the ring
# The more tokens, relative to other nodes, the larger the proportion of data
# that this node will store. You probably want all nodes to have the same number
# of tokens assuming they have equal hardware capability.
num_tokens: 256

# Directory where Scylla should store all its files, which are commitlog,
# data, hints, view_hints and saved_caches subdirectories. All of these
# subs can be overriden by the respective options below.
# If unset, the value defaults to /var/lib/scylla
# workdir: /var/lib/scylla

# Directory where Scylla should store data on disk.
# data_file_directories:
#    - /var/lib/scylla/data

# commit log.  when running on magnetic HDD, this should be a
# separate spindle than the data directories.
# commitlog_directory: /var/lib/scylla/commitlog

# commitlog_sync may be either "periodic" or "batch."
#
# When in batch mode, Scylla won't ack writes until the commit log
# has been fsynced to disk.  It will wait
# commitlog_sync_batch_window_in_ms milliseconds between fsyncs.
# This window should be kept short because the writer threads will
# be unable to do extra work while waiting.  (You may need to increase
# concurrent_writes for the same reason.)
#
# commitlog_sync: batch
# commitlog_sync_batch_window_in_ms: 2
#
# the other option is "periodic" where writes may be acked immediately
# and the CommitLog is simply synced every commitlog_sync_period_in_ms
# milliseconds.
commitlog_sync: periodic
commitlog_sync_period_in_ms: 10000

# The size of the individual commitlog file segments.  A commitlog
# segment may be archived, deleted, or recycled once all the data
# in it (potentially from each columnfamily in the system) has been
# flushed to sstables.
#
# The default size is 32, which is almost always fine, but if you are
# archiving commitlog segments (see commitlog_archiving.properties),
# then you probably want a finer granularity of archiving; 8 or 16 MB
# is reasonable.
commitlog_segment_size_in_mb: 32

# seed_provider class_name is saved for future use.
# A seed address is mandatory.
seed_provider:
    # The addresses of hosts that will serve as contact points for the joining node.
    # It allows the node to discover the cluster ring topology on startup (when
    # joining the cluster).
    # Once the node has joined the cluster, the seed list has no function.
    - class_name: org.apache.cassandra.locator.SimpleSeedProvider
      parameters:
          # In a new cluster, provide the address of the first node.
          # In an existing cluster, specify the address of at least one existing node.
          # If you specify addresses of more than one node, use a comma to separate them.
          # For example: "<IP1>,<IP2>,<IP3>"
          - seeds: "127.0.0.1"

# Address to bind to and tell other Scylla nodes to connect to.
# You _must_ change this if you want multiple nodes to be able to communicate!
#
# If you leave broadcast_address (below) empty, then setting listen_address
# to 0.0.0.0 is wrong as other nodes will not know how to reach this node.
# If you set broadcast_address, then you can set listen_address to 0.0.0.0.
listen_address: 0.0.0.0

# Address to broadcast to other Scylla nodes
# Leaving this blank will set it to the same value as listen_address
# broadcast_address: 1.2.3.4


# When using multiple physical network interfaces, set this to true to listen on broadcast_address
# in addition to the listen_address, allowing nodes to communicate in both interfaces.
# Ignore this property if the network configuration automatically routes between the public and private networks such as EC2.
#
# listen_on_broadcast_address: false

# port for the CQL native transport to listen for clients on
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
# To disable the CQL native transport, remove this option and configure native_transport_port_ssl.
native_transport_port: 9042

# Like native_transport_port, but clients are forwarded to specific shards, based on the
# client-side port numbers.
native_shard_aware_transport_port: 19042

# Enabling native transport encryption in client_encryption_options allows you to either use
# encryption for the standard port or to use a dedicated, additional port along with the unencrypted
# standard native_transport_port.
# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
# for native_transport_port. Setting native_transport_port_ssl to a different value
# from native_transport_port will use encryption for native_transport_port_ssl while
# keeping native_transport_port unencrypted.
#native_transport_port_ssl: 9142

# Like native_transport_port_ssl, but clients are forwarded to specific shards, based on the
# client-side port numbers.
#native_shard_aware_transport_port_ssl: 19142

# How long the coordinator should wait for read operations to complete
read_request_timeout_in_ms: 5000

# How long the coordinator should wait for writes to complete
write_request_timeout_in_ms: 2000
# how long a coordinator should continue to retry a CAS operation
# that contends with other proposals for the same row
cas_contention_timeout_in_ms: 1000

# phi value that must be reached for a host to be marked down.
# most users should never need to adjust this.
# phi_convict_threshold: 8

# IEndpointSnitch.  The snitch has two functions:
# - it teaches Scylla enough about your network topology to route
#   requests efficiently
# - it allows Scylla to spread replicas around your cluster to avoid
#   correlated failures. It does this by grouping machines into
#   "datacenters" and "racks."  Scylla will do its best not to have
#   more than one replica on the same "rack" (which may not actually
#   be a physical location)
#
# IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER,
# YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS
# ARE PLACED.
#
# Out of the box, Scylla provides
#  - SimpleSnitch:
#    Treats Strategy order as proximity. This can improve cache
#    locality when disabling read repair.  Only appropriate for
#    single-datacenter deployments.
#  - GossipingPropertyFileSnitch
#    This should be your go-to snitch for production use.  The rack
#    and datacenter for the local node are defined in
#    cassandra-rackdc.properties and propagated to other nodes via
#    gossip.  If cassandra-topology.properties exists, it is used as a
#    fallback, allowing migration from the PropertyFileSnitch.
#  - PropertyFileSnitch:
#    Proximity is determined by rack and data center, which are
#    explicitly configured in cassandra-topology.properties.
#  - Ec2Snitch:
#    Appropriate for EC2 deployments in a single Region. Loads Region
#    and Availability Zone information from the EC2 API. The Region is
#    treated as the datacenter, and the Availability Zone as the rack.
#    Only private IPs are used, so this will not work across multiple
#    Regions.
#  - Ec2MultiRegionSnitch:
#    Uses public IPs as broadcast_address to allow cross-region
#    connectivity.  (Thus, you should set seed addresses to the public
#    IP as well.) You will need to open the storage_port or
#    ssl_storage_port on the public IP firewall.  (For intra-Region
#    traffic, Scylla will switch to the private IP after
#    establishing a connection.)
#  - RackInferringSnitch:
#    Proximity is determined by rack and data center, which are
#    assumed to correspond to the 3rd and 2nd octet of each node's IP
#    address, respectively.  Unless this happens to match your
#    deployment conventions, this is best used as an example of
#    writing a custom Snitch class and is provided in that spirit.
#
# You can use a custom Snitch by setting this to the full class name
# of the snitch, which will be assumed to be on your classpath.
endpoint_snitch: GossipingPropertyFileSnitch

# The address or interface to bind the Thrift RPC service and native transport
# server to.
#
# Set rpc_address OR rpc_interface, not both. Interfaces must correspond
# to a single address, IP aliasing is not supported.
#
# Leaving rpc_address blank has the same effect as on listen_address
# (i.e. it will be based on the configured hostname of the node).
#
# Note that unlike listen_address, you can specify 0.0.0.0, but you must also
# set broadcast_rpc_address to a value other than 0.0.0.0.
#
# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
#
# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
rpc_address: localhost
# rpc_interface: eth1
# rpc_interface_prefer_ipv6: false

# port for Thrift to listen for clients on
rpc_port: 9160

# port for REST API server
api_port: 10000

# IP for the REST API server
api_address: 0.0.0.0

# Log WARN on any batch size exceeding this value. 128 kiB per batch by default.
# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
batch_size_warn_threshold_in_kb: 128

# Fail any multiple-partition batch exceeding this value. 1 MiB (8x warn threshold) by default.
batch_size_fail_threshold_in_kb: 1024

# Authentication backend, identifying users
# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthenticator,
# PasswordAuthenticator}.
#
# - AllowAllAuthenticator performs no checks - set it to disable authentication.
# - PasswordAuthenticator relies on username/password pairs to authenticate
#   users. It keeps usernames and hashed passwords in system_auth.credentials table.
#   Please increase system_auth keyspace replication factor if you use this authenticator.
# - com.scylladb.auth.TransitionalAuthenticator requires username/password pair
#   to authenticate in the same manner as PasswordAuthenticator, but improper credentials
#   result in being logged in as an anonymous user. Use for upgrading clusters' auth.
authenticator: AllowAllAuthenticator

# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthorizer,
# CassandraAuthorizer}.
#
# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
# - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
#   increase system_auth keyspace replication factor if you use this authorizer.
# - com.scylladb.auth.TransitionalAuthorizer wraps around the CassandraAuthorizer, using it for
#   authorizing permission management. Otherwise, it allows all. Use for upgrading
#   clusters' auth.
authorizer: AllowAllAuthorizer

# initial_token allows you to specify tokens manually.  While you can use # it with
# vnodes (num_tokens > 1, above) -- in which case you should provide a 
# comma-separated list -- it's primarily used when adding nodes # to legacy clusters 
# that do not have vnodes enabled.
# initial_token:

# RPC address to broadcast to drivers and other Scylla nodes. This cannot
# be set to 0.0.0.0. If left blank, this will be set to the value of
# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
# be set.
# broadcast_rpc_address: 1.2.3.4

# Uncomment to enable experimental features
# experimental_features:
#     - udf
#     - alternator-streams
#     - alternator-ttl
#     - raft

# The directory where hints files are stored if hinted handoff is enabled.
# hints_directory: /var/lib/scylla/hints
 
# The directory where hints files are stored for materialized-view updates
# view_hints_directory: /var/lib/scylla/view_hints

# See https://docs.scylladb.com/architecture/anti-entropy/hinted-handoff
# May either be "true" or "false" to enable globally, or contain a list
# of data centers to enable per-datacenter.
# hinted_handoff_enabled: DC1,DC2
# hinted_handoff_enabled: true

# this defines the maximum amount of time a dead host will have hints
# generated.  After it has been dead this long, new hints for it will not be
# created until it has been seen alive and gone down again.
# max_hint_window_in_ms: 10800000 # 3 hours


# Validity period for permissions cache (fetching permissions can be an
# expensive operation depending on the authorizer, CassandraAuthorizer is
# one example). Defaults to 10000, set to 0 to disable.
# Will be disabled automatically for AllowAllAuthorizer.
# permissions_validity_in_ms: 10000

# Refresh interval for permissions cache (if enabled).
# After this interval, cache entries become eligible for refresh. Upon next
# access, an async reload is scheduled and the old value returned until it
# completes. If permissions_validity_in_ms is non-zero, then this also must have
# a non-zero value. Defaults to 2000. It's recommended to set this value to
# be at least 3 times smaller than the permissions_validity_in_ms.
# permissions_update_interval_in_ms: 2000

# The partitioner is responsible for distributing groups of rows (by
# partition key) across nodes in the cluster.  You should leave this
# alone for new clusters.  The partitioner can NOT be changed without
# reloading all data, so when upgrading you should set this to the
# same partitioner you were already using.
#
# Murmur3Partitioner is currently the only supported partitioner,
#
partitioner: org.apache.cassandra.dht.Murmur3Partitioner

# Total space to use for commitlogs.
#
# If space gets above this value (it will round up to the next nearest
# segment multiple), Scylla will flush every dirty CF in the oldest
# segment and remove it.  So a small total commitlog space will tend
# to cause more flush activity on less-active columnfamilies.
#
# A value of -1 (default) will automatically equate it to the total amount of memory
# available for Scylla.
commitlog_total_space_in_mb: -1

# TCP port, for commands and data
# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
storage_port: 7000

# SSL port, for encrypted communication.  Unused unless enabled in
# encryption_options
# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
# ssl_storage_port: 7001

# listen_interface: eth0
# listen_interface_prefer_ipv6: false

# Whether to start the native transport server.
# Please note that the address on which the native transport is bound is the
# same as the rpc_address. The port however is different and specified below.
# start_native_transport: true

# The maximum size of allowed frame. Frame (requests) larger than this will
# be rejected as invalid. The default is 256MB.
# native_transport_max_frame_size_in_mb: 256

# Whether to start the thrift rpc server.
start_rpc: true

# enable or disable keepalive on rpc/native connections
# rpc_keepalive: true

# Set to true to have Scylla create a hard link to each sstable
# flushed or streamed locally in a backups/ subdirectory of the
# keyspace data.  Removing these links is the operator's
# responsibility.
# incremental_backups: false

# Whether or not to take a snapshot before each compaction.  Be
# careful using this option, since Scylla won't clean up the
# snapshots for you.  Mostly useful if you're paranoid when there
# is a data format change.
# snapshot_before_compaction: false

# Whether or not a snapshot is taken of the data before keyspace truncation
# or dropping of column families. The STRONGLY advised default of true 
# should be used to provide data safety. If you set this flag to false, you will
# lose data on truncation or drop.
# auto_snapshot: true

# When executing a scan, within or across a partition, we need to keep the
# tombstones seen in memory so we can return them to the coordinator, which
# will use them to make sure other replicas also know about the deleted rows.
# With workloads that generate a lot of tombstones, this can cause performance
# problems and even exaust the server heap.
# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
# Adjust the thresholds here if you understand the dangers and want to
# scan more tombstones anyway.  These thresholds may also be adjusted at runtime
# using the StorageService mbean.
# tombstone_warn_threshold: 1000
# tombstone_failure_threshold: 100000

# Granularity of the collation index of rows within a partition.
# Increase if your rows are large, or if you have a very large
# number of rows per partition.  The competing goals are these:
#   1) a smaller granularity means more index entries are generated
#      and looking up rows withing the partition by collation column
#      is faster
#   2) but, Scylla will keep the collation index in memory for hot
#      rows (as part of the key cache), so a larger granularity means
#      you can cache more hot rows
# column_index_size_in_kb: 64

# Auto-scaling of the promoted index prevents running out of memory
# when the promoted index grows too large (due to partitions with many rows
# vs. too small column_index_size_in_kb).  When the serialized representation
# of the promoted index grows by this threshold, the desired block size
# for this partition (initialized to column_index_size_in_kb)
# is doubled, to decrease the sampling resolution by half.
#
# To disable promoted index auto-scaling, set the threshold to 0.
# column_index_auto_scale_threshold_in_kb: 10240

# Log a warning when writing partitions larger than this value
# compaction_large_partition_warning_threshold_mb: 1000

# Log a warning when writing rows larger than this value
# compaction_large_row_warning_threshold_mb: 10

# Log a warning when writing cells larger than this value
# compaction_large_cell_warning_threshold_mb: 1

# Log a warning when row number is larger than this value
# compaction_rows_count_warning_threshold: 100000

# How long the coordinator should wait for seq or index scans to complete
# range_request_timeout_in_ms: 10000
# How long the coordinator should wait for writes to complete
# counter_write_request_timeout_in_ms: 5000
# How long a coordinator should continue to retry a CAS operation
# that contends with other proposals for the same row
# cas_contention_timeout_in_ms: 1000
# How long the coordinator should wait for truncates to complete
# (This can be much longer, because unless auto_snapshot is disabled
# we need to flush first so we can snapshot before removing the data.)
# truncate_request_timeout_in_ms: 60000
# The default timeout for other, miscellaneous operations
# request_timeout_in_ms: 10000

# Enable or disable inter-node encryption. 
# You must also generate keys and provide the appropriate key and trust store locations and passwords. 
#
# The available internode options are : all, none, dc, rack
# If set to dc scylla  will encrypt the traffic between the DCs
# If set to rack scylla  will encrypt the traffic between the racks
#
# SSL/TLS algorithm and ciphers used can be controlled by 
# the priority_string parameter. Info on priority string
# syntax and values is available at:
#   https://gnutls.org/manual/html_node/Priority-Strings.html
#
# The require_client_auth parameter allows you to 
# restrict access to service based on certificate 
# validation. Client must provide a certificate 
# accepted by the used trust store to connect.
# 
# server_encryption_options:
#    internode_encryption: none
#    certificate: conf/scylla.crt
#    keyfile: conf/scylla.key
#    truststore: <none, use system trust>
#    certficate_revocation_list: <none>
#    require_client_auth: False
#    priority_string: <none, use default>

# enable or disable client/server encryption.
# client_encryption_options:
#    enabled: false
#    certificate: conf/scylla.crt
#    keyfile: conf/scylla.key
#    truststore: <none, use system trust>
#    certficate_revocation_list: <none>
#    require_client_auth: False
#    priority_string: <none, use default>

# internode_compression controls whether traffic between nodes is
# compressed.
# can be:  all  - all traffic is compressed
#          dc   - traffic between different datacenters is compressed
#          none - nothing is compressed.
# internode_compression: none

# Enable or disable tcp_nodelay for inter-dc communication.
# Disabling it will result in larger (but fewer) network packets being sent,
# reducing overhead from the TCP protocol itself, at the cost of increasing
# latency if you block for cross-datacenter responses.
# inter_dc_tcp_nodelay: false

# Relaxation of environment checks.
#
# Scylla places certain requirements on its environment.  If these requirements are
# not met, performance and reliability can be degraded.
#
# These requirements include:
#    - A filesystem with good support for aysnchronous I/O (AIO). Currently,
#      this means XFS.
#
# false: strict environment checks are in place; do not start if they are not met.
# true: relaxed environment checks; performance and reliability may degraade.
#
developer_mode: true


# Idle-time background processing
#
# Scylla can perform certain jobs in the background while the system is otherwise idle,
# freeing processor resources when there is other work to be done.
#
# defragment_memory_on_idle: true
#
# prometheus port
# By default, Scylla opens prometheus API port on port 9180
# setting the port to 0 will disable the prometheus API.
# prometheus_port: 9180
#
# prometheus address
# Leaving this blank will set it to the same value as listen_address.
# This means that by default, Scylla listens to the prometheus API on the same
# listening address (and therefore network interface) used to listen for
# internal communication. If the monitoring node is not in this internal
# network, you can override prometheus_address explicitly - e.g., setting
# it to 0.0.0.0 to listen on all interfaces.
# prometheus_address: 1.2.3.4

# Distribution of data among cores (shards) within a node
#
# Scylla distributes data within a node among shards, using a round-robin
# strategy:
#  [shard0] [shard1] ... [shardN-1] [shard0] [shard1] ... [shardN-1] ...
#
# Scylla versions 1.6 and below used just one repetition of the pattern;
# this intefered with data placement among nodes (vnodes).
#
# Scylla versions 1.7 and above use 4096 repetitions of the pattern; this
# provides for better data distribution.
#
# the value below is log (base 2) of the number of repetitions.
#
# Set to 0 to avoid rewriting all data when upgrading from Scylla 1.6 and
# below.
#
# Keep at 12 for new clusters.
murmur3_partitioner_ignore_msb_bits: 12

# Bypass in-memory data cache (the row cache) when performing reversed queries.
# reversed_reads_auto_bypass_cache: false

# Use a new optimized algorithm for performing reversed reads.
# Set to `false` to fall-back to the old algorithm.
# enable_optimized_reversed_reads: true

# Use on a new, parallel algorithm for performing aggregate queries.
# Set to `false` to fall-back to the old algorithm.
# enable_parallelized_aggregation: true