Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 

2156 rader
52 KiB

  1. package redis
  2. import (
  3. "context"
  4. "crypto/tls"
  5. "fmt"
  6. "math"
  7. "net"
  8. "net/url"
  9. "runtime"
  10. "sort"
  11. "strings"
  12. "sync"
  13. "sync/atomic"
  14. "time"
  15. "github.com/redis/go-redis/v9/auth"
  16. "github.com/redis/go-redis/v9/internal"
  17. "github.com/redis/go-redis/v9/internal/hashtag"
  18. "github.com/redis/go-redis/v9/internal/pool"
  19. "github.com/redis/go-redis/v9/internal/proto"
  20. "github.com/redis/go-redis/v9/internal/rand"
  21. "github.com/redis/go-redis/v9/maintnotifications"
  22. "github.com/redis/go-redis/v9/push"
  23. )
  24. const (
  25. minLatencyMeasurementInterval = 10 * time.Second
  26. )
  27. var errClusterNoNodes = fmt.Errorf("redis: cluster has no nodes")
  28. // ClusterOptions are used to configure a cluster client and should be
  29. // passed to NewClusterClient.
  30. type ClusterOptions struct {
  31. // A seed list of host:port addresses of cluster nodes.
  32. Addrs []string
  33. // ClientName will execute the `CLIENT SETNAME ClientName` command for each conn.
  34. ClientName string
  35. // NewClient creates a cluster node client with provided name and options.
  36. // If NewClient is set by the user, the user is responsible for handling maintnotifications upgrades and push notifications.
  37. NewClient func(opt *Options) *Client
  38. // The maximum number of retries before giving up. Command is retried
  39. // on network errors and MOVED/ASK redirects.
  40. // Default is 3 retries.
  41. MaxRedirects int
  42. // Enables read-only commands on slave nodes.
  43. ReadOnly bool
  44. // Allows routing read-only commands to the closest master or slave node.
  45. // It automatically enables ReadOnly.
  46. RouteByLatency bool
  47. // Allows routing read-only commands to the random master or slave node.
  48. // It automatically enables ReadOnly.
  49. RouteRandomly bool
  50. // Optional function that returns cluster slots information.
  51. // It is useful to manually create cluster of standalone Redis servers
  52. // and load-balance read/write operations between master and slaves.
  53. // It can use service like ZooKeeper to maintain configuration information
  54. // and Cluster.ReloadState to manually trigger state reloading.
  55. ClusterSlots func(context.Context) ([]ClusterSlot, error)
  56. // Following options are copied from Options struct.
  57. Dialer func(ctx context.Context, network, addr string) (net.Conn, error)
  58. OnConnect func(ctx context.Context, cn *Conn) error
  59. Protocol int
  60. Username string
  61. Password string
  62. CredentialsProvider func() (username string, password string)
  63. CredentialsProviderContext func(ctx context.Context) (username string, password string, err error)
  64. StreamingCredentialsProvider auth.StreamingCredentialsProvider
  65. // MaxRetries is the maximum number of retries before giving up.
  66. // For ClusterClient, retries are disabled by default (set to -1),
  67. // because the cluster client handles all kinds of retries internally.
  68. // This is intentional and differs from the standalone Options default.
  69. MaxRetries int
  70. MinRetryBackoff time.Duration
  71. MaxRetryBackoff time.Duration
  72. DialTimeout time.Duration
  73. ReadTimeout time.Duration
  74. WriteTimeout time.Duration
  75. ContextTimeoutEnabled bool
  76. PoolFIFO bool
  77. PoolSize int // applies per cluster node and not for the whole cluster
  78. PoolTimeout time.Duration
  79. MinIdleConns int
  80. MaxIdleConns int
  81. MaxActiveConns int // applies per cluster node and not for the whole cluster
  82. ConnMaxIdleTime time.Duration
  83. ConnMaxLifetime time.Duration
  84. // ReadBufferSize is the size of the bufio.Reader buffer for each connection.
  85. // Larger buffers can improve performance for commands that return large responses.
  86. // Smaller buffers can improve memory usage for larger pools.
  87. //
  88. // default: 32KiB (32768 bytes)
  89. ReadBufferSize int
  90. // WriteBufferSize is the size of the bufio.Writer buffer for each connection.
  91. // Larger buffers can improve performance for large pipelines and commands with many arguments.
  92. // Smaller buffers can improve memory usage for larger pools.
  93. //
  94. // default: 32KiB (32768 bytes)
  95. WriteBufferSize int
  96. TLSConfig *tls.Config
  97. // DisableIndentity - Disable set-lib on connect.
  98. //
  99. // default: false
  100. //
  101. // Deprecated: Use DisableIdentity instead.
  102. DisableIndentity bool
  103. // DisableIdentity is used to disable CLIENT SETINFO command on connect.
  104. //
  105. // default: false
  106. DisableIdentity bool
  107. IdentitySuffix string // Add suffix to client name. Default is empty.
  108. // UnstableResp3 enables Unstable mode for Redis Search module with RESP3.
  109. UnstableResp3 bool
  110. // PushNotificationProcessor is the processor for handling push notifications.
  111. // If nil, a default processor will be created for RESP3 connections.
  112. PushNotificationProcessor push.NotificationProcessor
  113. // FailingTimeoutSeconds is the timeout in seconds for marking a cluster node as failing.
  114. // When a node is marked as failing, it will be avoided for this duration.
  115. // Default is 15 seconds.
  116. FailingTimeoutSeconds int
  117. // MaintNotificationsConfig provides custom configuration for maintnotifications upgrades.
  118. // When MaintNotificationsConfig.Mode is not "disabled", the client will handle
  119. // cluster upgrade notifications gracefully and manage connection/pool state
  120. // transitions seamlessly. Requires Protocol: 3 (RESP3) for push notifications.
  121. // If nil, maintnotifications upgrades are in "auto" mode and will be enabled if the server supports it.
  122. // The ClusterClient does not directly work with maintnotifications, it is up to the clients in the Nodes map to work with maintnotifications.
  123. MaintNotificationsConfig *maintnotifications.Config
  124. }
  125. func (opt *ClusterOptions) init() {
  126. switch opt.MaxRedirects {
  127. case -1:
  128. opt.MaxRedirects = 0
  129. case 0:
  130. opt.MaxRedirects = 3
  131. }
  132. if opt.RouteByLatency || opt.RouteRandomly {
  133. opt.ReadOnly = true
  134. }
  135. if opt.PoolSize == 0 {
  136. opt.PoolSize = 5 * runtime.GOMAXPROCS(0)
  137. }
  138. if opt.ReadBufferSize == 0 {
  139. opt.ReadBufferSize = proto.DefaultBufferSize
  140. }
  141. if opt.WriteBufferSize == 0 {
  142. opt.WriteBufferSize = proto.DefaultBufferSize
  143. }
  144. switch opt.ReadTimeout {
  145. case -1:
  146. opt.ReadTimeout = 0
  147. case 0:
  148. opt.ReadTimeout = 3 * time.Second
  149. }
  150. switch opt.WriteTimeout {
  151. case -1:
  152. opt.WriteTimeout = 0
  153. case 0:
  154. opt.WriteTimeout = opt.ReadTimeout
  155. }
  156. if opt.MaxRetries == 0 {
  157. opt.MaxRetries = -1
  158. }
  159. switch opt.MinRetryBackoff {
  160. case -1:
  161. opt.MinRetryBackoff = 0
  162. case 0:
  163. opt.MinRetryBackoff = 8 * time.Millisecond
  164. }
  165. switch opt.MaxRetryBackoff {
  166. case -1:
  167. opt.MaxRetryBackoff = 0
  168. case 0:
  169. opt.MaxRetryBackoff = 512 * time.Millisecond
  170. }
  171. if opt.NewClient == nil {
  172. opt.NewClient = NewClient
  173. }
  174. if opt.FailingTimeoutSeconds == 0 {
  175. opt.FailingTimeoutSeconds = 15
  176. }
  177. }
  178. // ParseClusterURL parses a URL into ClusterOptions that can be used to connect to Redis.
  179. // The URL must be in the form:
  180. //
  181. // redis://<user>:<password>@<host>:<port>
  182. // or
  183. // rediss://<user>:<password>@<host>:<port>
  184. //
  185. // To add additional addresses, specify the query parameter, "addr" one or more times. e.g:
  186. //
  187. // redis://<user>:<password>@<host>:<port>?addr=<host2>:<port2>&addr=<host3>:<port3>
  188. // or
  189. // rediss://<user>:<password>@<host>:<port>?addr=<host2>:<port2>&addr=<host3>:<port3>
  190. //
  191. // Most Option fields can be set using query parameters, with the following restrictions:
  192. // - field names are mapped using snake-case conversion: to set MaxRetries, use max_retries
  193. // - only scalar type fields are supported (bool, int, time.Duration)
  194. // - for time.Duration fields, values must be a valid input for time.ParseDuration();
  195. // additionally a plain integer as value (i.e. without unit) is interpreted as seconds
  196. // - to disable a duration field, use value less than or equal to 0; to use the default
  197. // value, leave the value blank or remove the parameter
  198. // - only the last value is interpreted if a parameter is given multiple times
  199. // - fields "network", "addr", "username" and "password" can only be set using other
  200. // URL attributes (scheme, host, userinfo, resp.), query parameters using these
  201. // names will be treated as unknown parameters
  202. // - unknown parameter names will result in an error
  203. //
  204. // Example:
  205. //
  206. // redis://user:password@localhost:6789?dial_timeout=3&read_timeout=6s&addr=localhost:6790&addr=localhost:6791
  207. // is equivalent to:
  208. // &ClusterOptions{
  209. // Addr: ["localhost:6789", "localhost:6790", "localhost:6791"]
  210. // DialTimeout: 3 * time.Second, // no time unit = seconds
  211. // ReadTimeout: 6 * time.Second,
  212. // }
  213. func ParseClusterURL(redisURL string) (*ClusterOptions, error) {
  214. o := &ClusterOptions{}
  215. u, err := url.Parse(redisURL)
  216. if err != nil {
  217. return nil, err
  218. }
  219. // add base URL to the array of addresses
  220. // more addresses may be added through the URL params
  221. h, p := getHostPortWithDefaults(u)
  222. o.Addrs = append(o.Addrs, net.JoinHostPort(h, p))
  223. // setup username, password, and other configurations
  224. o, err = setupClusterConn(u, h, o)
  225. if err != nil {
  226. return nil, err
  227. }
  228. return o, nil
  229. }
  230. // setupClusterConn gets the username and password from the URL and the query parameters.
  231. func setupClusterConn(u *url.URL, host string, o *ClusterOptions) (*ClusterOptions, error) {
  232. switch u.Scheme {
  233. case "rediss":
  234. o.TLSConfig = &tls.Config{ServerName: host}
  235. fallthrough
  236. case "redis":
  237. o.Username, o.Password = getUserPassword(u)
  238. default:
  239. return nil, fmt.Errorf("redis: invalid URL scheme: %s", u.Scheme)
  240. }
  241. // retrieve the configuration from the query parameters
  242. o, err := setupClusterQueryParams(u, o)
  243. if err != nil {
  244. return nil, err
  245. }
  246. return o, nil
  247. }
  248. // setupClusterQueryParams converts query parameters in u to option value in o.
  249. func setupClusterQueryParams(u *url.URL, o *ClusterOptions) (*ClusterOptions, error) {
  250. q := queryOptions{q: u.Query()}
  251. o.Protocol = q.int("protocol")
  252. o.ClientName = q.string("client_name")
  253. o.MaxRedirects = q.int("max_redirects")
  254. o.ReadOnly = q.bool("read_only")
  255. o.RouteByLatency = q.bool("route_by_latency")
  256. o.RouteRandomly = q.bool("route_randomly")
  257. o.MaxRetries = q.int("max_retries")
  258. o.MinRetryBackoff = q.duration("min_retry_backoff")
  259. o.MaxRetryBackoff = q.duration("max_retry_backoff")
  260. o.DialTimeout = q.duration("dial_timeout")
  261. o.ReadTimeout = q.duration("read_timeout")
  262. o.WriteTimeout = q.duration("write_timeout")
  263. o.PoolFIFO = q.bool("pool_fifo")
  264. o.PoolSize = q.int("pool_size")
  265. o.MinIdleConns = q.int("min_idle_conns")
  266. o.MaxIdleConns = q.int("max_idle_conns")
  267. o.MaxActiveConns = q.int("max_active_conns")
  268. o.PoolTimeout = q.duration("pool_timeout")
  269. o.ConnMaxLifetime = q.duration("conn_max_lifetime")
  270. o.ConnMaxIdleTime = q.duration("conn_max_idle_time")
  271. o.FailingTimeoutSeconds = q.int("failing_timeout_seconds")
  272. if q.err != nil {
  273. return nil, q.err
  274. }
  275. // addr can be specified as many times as needed
  276. addrs := q.strings("addr")
  277. for _, addr := range addrs {
  278. h, p, err := net.SplitHostPort(addr)
  279. if err != nil || h == "" || p == "" {
  280. return nil, fmt.Errorf("redis: unable to parse addr param: %s", addr)
  281. }
  282. o.Addrs = append(o.Addrs, net.JoinHostPort(h, p))
  283. }
  284. // any parameters left?
  285. if r := q.remaining(); len(r) > 0 {
  286. return nil, fmt.Errorf("redis: unexpected option: %s", strings.Join(r, ", "))
  287. }
  288. return o, nil
  289. }
  290. func (opt *ClusterOptions) clientOptions() *Options {
  291. // Clone MaintNotificationsConfig to avoid sharing between cluster node clients
  292. var maintNotificationsConfig *maintnotifications.Config
  293. if opt.MaintNotificationsConfig != nil {
  294. configClone := *opt.MaintNotificationsConfig
  295. maintNotificationsConfig = &configClone
  296. }
  297. return &Options{
  298. ClientName: opt.ClientName,
  299. Dialer: opt.Dialer,
  300. OnConnect: opt.OnConnect,
  301. Protocol: opt.Protocol,
  302. Username: opt.Username,
  303. Password: opt.Password,
  304. CredentialsProvider: opt.CredentialsProvider,
  305. CredentialsProviderContext: opt.CredentialsProviderContext,
  306. StreamingCredentialsProvider: opt.StreamingCredentialsProvider,
  307. MaxRetries: opt.MaxRetries,
  308. MinRetryBackoff: opt.MinRetryBackoff,
  309. MaxRetryBackoff: opt.MaxRetryBackoff,
  310. DialTimeout: opt.DialTimeout,
  311. ReadTimeout: opt.ReadTimeout,
  312. WriteTimeout: opt.WriteTimeout,
  313. ContextTimeoutEnabled: opt.ContextTimeoutEnabled,
  314. PoolFIFO: opt.PoolFIFO,
  315. PoolSize: opt.PoolSize,
  316. PoolTimeout: opt.PoolTimeout,
  317. MinIdleConns: opt.MinIdleConns,
  318. MaxIdleConns: opt.MaxIdleConns,
  319. MaxActiveConns: opt.MaxActiveConns,
  320. ConnMaxIdleTime: opt.ConnMaxIdleTime,
  321. ConnMaxLifetime: opt.ConnMaxLifetime,
  322. ReadBufferSize: opt.ReadBufferSize,
  323. WriteBufferSize: opt.WriteBufferSize,
  324. DisableIdentity: opt.DisableIdentity,
  325. DisableIndentity: opt.DisableIdentity,
  326. IdentitySuffix: opt.IdentitySuffix,
  327. FailingTimeoutSeconds: opt.FailingTimeoutSeconds,
  328. TLSConfig: opt.TLSConfig,
  329. // If ClusterSlots is populated, then we probably have an artificial
  330. // cluster whose nodes are not in clustering mode (otherwise there isn't
  331. // much use for ClusterSlots config). This means we cannot execute the
  332. // READONLY command against that node -- setting readOnly to false in such
  333. // situations in the options below will prevent that from happening.
  334. readOnly: opt.ReadOnly && opt.ClusterSlots == nil,
  335. UnstableResp3: opt.UnstableResp3,
  336. MaintNotificationsConfig: maintNotificationsConfig,
  337. PushNotificationProcessor: opt.PushNotificationProcessor,
  338. }
  339. }
  340. //------------------------------------------------------------------------------
  341. type clusterNode struct {
  342. Client *Client
  343. latency uint32 // atomic
  344. generation uint32 // atomic
  345. failing uint32 // atomic
  346. loaded uint32 // atomic
  347. // last time the latency measurement was performed for the node, stored in nanoseconds from epoch
  348. lastLatencyMeasurement int64 // atomic
  349. }
  350. func newClusterNode(clOpt *ClusterOptions, addr string) *clusterNode {
  351. opt := clOpt.clientOptions()
  352. opt.Addr = addr
  353. node := clusterNode{
  354. Client: clOpt.NewClient(opt),
  355. }
  356. node.latency = math.MaxUint32
  357. if clOpt.RouteByLatency {
  358. go node.updateLatency()
  359. }
  360. return &node
  361. }
  362. func (n *clusterNode) String() string {
  363. return n.Client.String()
  364. }
  365. func (n *clusterNode) Close() error {
  366. return n.Client.Close()
  367. }
  368. const maximumNodeLatency = 1 * time.Minute
  369. func (n *clusterNode) updateLatency() {
  370. const numProbe = 10
  371. var dur uint64
  372. successes := 0
  373. for i := 0; i < numProbe; i++ {
  374. time.Sleep(time.Duration(10+rand.Intn(10)) * time.Millisecond)
  375. start := time.Now()
  376. err := n.Client.Ping(context.TODO()).Err()
  377. if err == nil {
  378. dur += uint64(time.Since(start) / time.Microsecond)
  379. successes++
  380. }
  381. }
  382. var latency float64
  383. if successes == 0 {
  384. // If none of the pings worked, set latency to some arbitrarily high value so this node gets
  385. // least priority.
  386. latency = float64((maximumNodeLatency) / time.Microsecond)
  387. } else {
  388. latency = float64(dur) / float64(successes)
  389. }
  390. atomic.StoreUint32(&n.latency, uint32(latency+0.5))
  391. n.SetLastLatencyMeasurement(time.Now())
  392. }
  393. func (n *clusterNode) Latency() time.Duration {
  394. latency := atomic.LoadUint32(&n.latency)
  395. return time.Duration(latency) * time.Microsecond
  396. }
  397. func (n *clusterNode) MarkAsFailing() {
  398. atomic.StoreUint32(&n.failing, uint32(time.Now().Unix()))
  399. atomic.StoreUint32(&n.loaded, 0)
  400. }
  401. func (n *clusterNode) Failing() bool {
  402. timeout := int64(n.Client.opt.FailingTimeoutSeconds)
  403. failing := atomic.LoadUint32(&n.failing)
  404. if failing == 0 {
  405. return false
  406. }
  407. if time.Now().Unix()-int64(failing) < timeout {
  408. return true
  409. }
  410. atomic.StoreUint32(&n.failing, 0)
  411. return false
  412. }
  413. func (n *clusterNode) Generation() uint32 {
  414. return atomic.LoadUint32(&n.generation)
  415. }
  416. func (n *clusterNode) LastLatencyMeasurement() int64 {
  417. return atomic.LoadInt64(&n.lastLatencyMeasurement)
  418. }
  419. func (n *clusterNode) SetGeneration(gen uint32) {
  420. for {
  421. v := atomic.LoadUint32(&n.generation)
  422. if gen < v || atomic.CompareAndSwapUint32(&n.generation, v, gen) {
  423. break
  424. }
  425. }
  426. }
  427. func (n *clusterNode) SetLastLatencyMeasurement(t time.Time) {
  428. for {
  429. v := atomic.LoadInt64(&n.lastLatencyMeasurement)
  430. if t.UnixNano() < v || atomic.CompareAndSwapInt64(&n.lastLatencyMeasurement, v, t.UnixNano()) {
  431. break
  432. }
  433. }
  434. }
  435. func (n *clusterNode) Loading() bool {
  436. loaded := atomic.LoadUint32(&n.loaded)
  437. if loaded == 1 {
  438. return false
  439. }
  440. // check if the node is loading
  441. ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
  442. defer cancel()
  443. err := n.Client.Ping(ctx).Err()
  444. loading := err != nil && isLoadingError(err)
  445. if !loading {
  446. atomic.StoreUint32(&n.loaded, 1)
  447. }
  448. return loading
  449. }
  450. //------------------------------------------------------------------------------
  451. type clusterNodes struct {
  452. opt *ClusterOptions
  453. mu sync.RWMutex
  454. addrs []string
  455. nodes map[string]*clusterNode
  456. activeAddrs []string
  457. closed bool
  458. onNewNode []func(rdb *Client)
  459. generation uint32 // atomic
  460. }
  461. func newClusterNodes(opt *ClusterOptions) *clusterNodes {
  462. return &clusterNodes{
  463. opt: opt,
  464. addrs: opt.Addrs,
  465. nodes: make(map[string]*clusterNode),
  466. }
  467. }
  468. func (c *clusterNodes) Close() error {
  469. c.mu.Lock()
  470. defer c.mu.Unlock()
  471. if c.closed {
  472. return nil
  473. }
  474. c.closed = true
  475. var firstErr error
  476. for _, node := range c.nodes {
  477. if err := node.Client.Close(); err != nil && firstErr == nil {
  478. firstErr = err
  479. }
  480. }
  481. c.nodes = nil
  482. c.activeAddrs = nil
  483. return firstErr
  484. }
  485. func (c *clusterNodes) OnNewNode(fn func(rdb *Client)) {
  486. c.mu.Lock()
  487. c.onNewNode = append(c.onNewNode, fn)
  488. c.mu.Unlock()
  489. }
  490. func (c *clusterNodes) Addrs() ([]string, error) {
  491. var addrs []string
  492. c.mu.RLock()
  493. closed := c.closed //nolint:ifshort
  494. if !closed {
  495. if len(c.activeAddrs) > 0 {
  496. addrs = make([]string, len(c.activeAddrs))
  497. copy(addrs, c.activeAddrs)
  498. } else {
  499. addrs = make([]string, len(c.addrs))
  500. copy(addrs, c.addrs)
  501. }
  502. }
  503. c.mu.RUnlock()
  504. if closed {
  505. return nil, pool.ErrClosed
  506. }
  507. if len(addrs) == 0 {
  508. return nil, errClusterNoNodes
  509. }
  510. return addrs, nil
  511. }
  512. func (c *clusterNodes) NextGeneration() uint32 {
  513. return atomic.AddUint32(&c.generation, 1)
  514. }
  515. // GC removes unused nodes.
  516. func (c *clusterNodes) GC(generation uint32) {
  517. var collected []*clusterNode
  518. c.mu.Lock()
  519. c.activeAddrs = c.activeAddrs[:0]
  520. now := time.Now()
  521. for addr, node := range c.nodes {
  522. if node.Generation() >= generation {
  523. c.activeAddrs = append(c.activeAddrs, addr)
  524. if c.opt.RouteByLatency && node.LastLatencyMeasurement() < now.Add(-minLatencyMeasurementInterval).UnixNano() {
  525. go node.updateLatency()
  526. }
  527. continue
  528. }
  529. delete(c.nodes, addr)
  530. collected = append(collected, node)
  531. }
  532. c.mu.Unlock()
  533. for _, node := range collected {
  534. _ = node.Client.Close()
  535. }
  536. }
  537. func (c *clusterNodes) GetOrCreate(addr string) (*clusterNode, error) {
  538. node, err := c.get(addr)
  539. if err != nil {
  540. return nil, err
  541. }
  542. if node != nil {
  543. return node, nil
  544. }
  545. c.mu.Lock()
  546. defer c.mu.Unlock()
  547. if c.closed {
  548. return nil, pool.ErrClosed
  549. }
  550. node, ok := c.nodes[addr]
  551. if ok {
  552. return node, nil
  553. }
  554. node = newClusterNode(c.opt, addr)
  555. for _, fn := range c.onNewNode {
  556. fn(node.Client)
  557. }
  558. c.addrs = appendIfNotExist(c.addrs, addr)
  559. c.nodes[addr] = node
  560. return node, nil
  561. }
  562. func (c *clusterNodes) get(addr string) (*clusterNode, error) {
  563. c.mu.RLock()
  564. defer c.mu.RUnlock()
  565. if c.closed {
  566. return nil, pool.ErrClosed
  567. }
  568. return c.nodes[addr], nil
  569. }
  570. func (c *clusterNodes) All() ([]*clusterNode, error) {
  571. c.mu.RLock()
  572. defer c.mu.RUnlock()
  573. if c.closed {
  574. return nil, pool.ErrClosed
  575. }
  576. cp := make([]*clusterNode, 0, len(c.nodes))
  577. for _, node := range c.nodes {
  578. cp = append(cp, node)
  579. }
  580. return cp, nil
  581. }
  582. func (c *clusterNodes) Random() (*clusterNode, error) {
  583. addrs, err := c.Addrs()
  584. if err != nil {
  585. return nil, err
  586. }
  587. n := rand.Intn(len(addrs))
  588. return c.GetOrCreate(addrs[n])
  589. }
  590. //------------------------------------------------------------------------------
  591. type clusterSlot struct {
  592. start int
  593. end int
  594. nodes []*clusterNode
  595. }
  596. type clusterSlotSlice []*clusterSlot
  597. func (p clusterSlotSlice) Len() int {
  598. return len(p)
  599. }
  600. func (p clusterSlotSlice) Less(i, j int) bool {
  601. return p[i].start < p[j].start
  602. }
  603. func (p clusterSlotSlice) Swap(i, j int) {
  604. p[i], p[j] = p[j], p[i]
  605. }
  606. type clusterState struct {
  607. nodes *clusterNodes
  608. Masters []*clusterNode
  609. Slaves []*clusterNode
  610. slots []*clusterSlot
  611. generation uint32
  612. createdAt time.Time
  613. }
  614. func newClusterState(
  615. nodes *clusterNodes, slots []ClusterSlot, origin string,
  616. ) (*clusterState, error) {
  617. c := clusterState{
  618. nodes: nodes,
  619. slots: make([]*clusterSlot, 0, len(slots)),
  620. generation: nodes.NextGeneration(),
  621. createdAt: time.Now(),
  622. }
  623. originHost, _, _ := net.SplitHostPort(origin)
  624. isLoopbackOrigin := isLoopback(originHost)
  625. for _, slot := range slots {
  626. var nodes []*clusterNode
  627. for i, slotNode := range slot.Nodes {
  628. addr := slotNode.Addr
  629. if !isLoopbackOrigin {
  630. addr = replaceLoopbackHost(addr, originHost)
  631. }
  632. node, err := c.nodes.GetOrCreate(addr)
  633. if err != nil {
  634. return nil, err
  635. }
  636. node.SetGeneration(c.generation)
  637. nodes = append(nodes, node)
  638. if i == 0 {
  639. c.Masters = appendIfNotExist(c.Masters, node)
  640. } else {
  641. c.Slaves = appendIfNotExist(c.Slaves, node)
  642. }
  643. }
  644. c.slots = append(c.slots, &clusterSlot{
  645. start: slot.Start,
  646. end: slot.End,
  647. nodes: nodes,
  648. })
  649. }
  650. sort.Sort(clusterSlotSlice(c.slots))
  651. time.AfterFunc(time.Minute, func() {
  652. nodes.GC(c.generation)
  653. })
  654. return &c, nil
  655. }
  656. func replaceLoopbackHost(nodeAddr, originHost string) string {
  657. nodeHost, nodePort, err := net.SplitHostPort(nodeAddr)
  658. if err != nil {
  659. return nodeAddr
  660. }
  661. nodeIP := net.ParseIP(nodeHost)
  662. if nodeIP == nil {
  663. return nodeAddr
  664. }
  665. if !nodeIP.IsLoopback() {
  666. return nodeAddr
  667. }
  668. // Use origin host which is not loopback and node port.
  669. return net.JoinHostPort(originHost, nodePort)
  670. }
  671. // isLoopback returns true if the host is a loopback address.
  672. // For IP addresses, it uses net.IP.IsLoopback().
  673. // For hostnames, it recognizes well-known loopback hostnames like "localhost"
  674. // and Docker-specific loopback patterns like "*.docker.internal".
  675. func isLoopback(host string) bool {
  676. ip := net.ParseIP(host)
  677. if ip != nil {
  678. return ip.IsLoopback()
  679. }
  680. if strings.ToLower(host) == "localhost" {
  681. return true
  682. }
  683. if strings.HasSuffix(strings.ToLower(host), ".docker.internal") {
  684. return true
  685. }
  686. return false
  687. }
  688. func (c *clusterState) slotMasterNode(slot int) (*clusterNode, error) {
  689. nodes := c.slotNodes(slot)
  690. if len(nodes) > 0 {
  691. return nodes[0], nil
  692. }
  693. return c.nodes.Random()
  694. }
  695. func (c *clusterState) slotSlaveNode(slot int) (*clusterNode, error) {
  696. nodes := c.slotNodes(slot)
  697. switch len(nodes) {
  698. case 0:
  699. return c.nodes.Random()
  700. case 1:
  701. return nodes[0], nil
  702. case 2:
  703. slave := nodes[1]
  704. if !slave.Failing() && !slave.Loading() {
  705. return slave, nil
  706. }
  707. return nodes[0], nil
  708. default:
  709. var slave *clusterNode
  710. for i := 0; i < 10; i++ {
  711. n := rand.Intn(len(nodes)-1) + 1
  712. slave = nodes[n]
  713. if !slave.Failing() && !slave.Loading() {
  714. return slave, nil
  715. }
  716. }
  717. // All slaves are loading - use master.
  718. return nodes[0], nil
  719. }
  720. }
  721. func (c *clusterState) slotClosestNode(slot int) (*clusterNode, error) {
  722. nodes := c.slotNodes(slot)
  723. if len(nodes) == 0 {
  724. return c.nodes.Random()
  725. }
  726. var allNodesFailing = true
  727. var (
  728. closestNonFailingNode *clusterNode
  729. closestNode *clusterNode
  730. minLatency time.Duration
  731. )
  732. // setting the max possible duration as zerovalue for minlatency
  733. minLatency = time.Duration(math.MaxInt64)
  734. for _, n := range nodes {
  735. if closestNode == nil || n.Latency() < minLatency {
  736. closestNode = n
  737. minLatency = n.Latency()
  738. if !n.Failing() {
  739. closestNonFailingNode = n
  740. allNodesFailing = false
  741. }
  742. }
  743. }
  744. // pick the healthly node with the lowest latency
  745. if !allNodesFailing && closestNonFailingNode != nil {
  746. return closestNonFailingNode, nil
  747. }
  748. // if all nodes are failing, we will pick the temporarily failing node with lowest latency
  749. if minLatency < maximumNodeLatency && closestNode != nil {
  750. internal.Logger.Printf(context.TODO(), "redis: all nodes are marked as failed, picking the temporarily failing node with lowest latency")
  751. return closestNode, nil
  752. }
  753. // If all nodes are having the maximum latency(all pings are failing) - return a random node across the cluster
  754. internal.Logger.Printf(context.TODO(), "redis: pings to all nodes are failing, picking a random node across the cluster")
  755. return c.nodes.Random()
  756. }
  757. func (c *clusterState) slotRandomNode(slot int) (*clusterNode, error) {
  758. nodes := c.slotNodes(slot)
  759. if len(nodes) == 0 {
  760. return c.nodes.Random()
  761. }
  762. if len(nodes) == 1 {
  763. return nodes[0], nil
  764. }
  765. randomNodes := rand.Perm(len(nodes))
  766. for _, idx := range randomNodes {
  767. if node := nodes[idx]; !node.Failing() {
  768. return node, nil
  769. }
  770. }
  771. return nodes[randomNodes[0]], nil
  772. }
  773. func (c *clusterState) slotNodes(slot int) []*clusterNode {
  774. i := sort.Search(len(c.slots), func(i int) bool {
  775. return c.slots[i].end >= slot
  776. })
  777. if i >= len(c.slots) {
  778. return nil
  779. }
  780. x := c.slots[i]
  781. if slot >= x.start && slot <= x.end {
  782. return x.nodes
  783. }
  784. return nil
  785. }
  786. //------------------------------------------------------------------------------
  787. type clusterStateHolder struct {
  788. load func(ctx context.Context) (*clusterState, error)
  789. state atomic.Value
  790. reloading uint32 // atomic
  791. }
  792. func newClusterStateHolder(fn func(ctx context.Context) (*clusterState, error)) *clusterStateHolder {
  793. return &clusterStateHolder{
  794. load: fn,
  795. }
  796. }
  797. func (c *clusterStateHolder) Reload(ctx context.Context) (*clusterState, error) {
  798. state, err := c.load(ctx)
  799. if err != nil {
  800. return nil, err
  801. }
  802. c.state.Store(state)
  803. return state, nil
  804. }
  805. func (c *clusterStateHolder) LazyReload() {
  806. if !atomic.CompareAndSwapUint32(&c.reloading, 0, 1) {
  807. return
  808. }
  809. go func() {
  810. defer atomic.StoreUint32(&c.reloading, 0)
  811. _, err := c.Reload(context.Background())
  812. if err != nil {
  813. return
  814. }
  815. time.Sleep(200 * time.Millisecond)
  816. }()
  817. }
  818. func (c *clusterStateHolder) Get(ctx context.Context) (*clusterState, error) {
  819. v := c.state.Load()
  820. if v == nil {
  821. return c.Reload(ctx)
  822. }
  823. state := v.(*clusterState)
  824. if time.Since(state.createdAt) > 10*time.Second {
  825. c.LazyReload()
  826. }
  827. return state, nil
  828. }
  829. func (c *clusterStateHolder) ReloadOrGet(ctx context.Context) (*clusterState, error) {
  830. state, err := c.Reload(ctx)
  831. if err == nil {
  832. return state, nil
  833. }
  834. return c.Get(ctx)
  835. }
  836. //------------------------------------------------------------------------------
  837. // ClusterClient is a Redis Cluster client representing a pool of zero
  838. // or more underlying connections. It's safe for concurrent use by
  839. // multiple goroutines.
  840. type ClusterClient struct {
  841. opt *ClusterOptions
  842. nodes *clusterNodes
  843. state *clusterStateHolder
  844. cmdsInfoCache *cmdsInfoCache
  845. cmdable
  846. hooksMixin
  847. }
  848. // NewClusterClient returns a Redis Cluster client as described in
  849. // http://redis.io/topics/cluster-spec.
  850. func NewClusterClient(opt *ClusterOptions) *ClusterClient {
  851. if opt == nil {
  852. panic("redis: NewClusterClient nil options")
  853. }
  854. opt.init()
  855. c := &ClusterClient{
  856. opt: opt,
  857. nodes: newClusterNodes(opt),
  858. }
  859. c.state = newClusterStateHolder(c.loadState)
  860. c.cmdsInfoCache = newCmdsInfoCache(c.cmdsInfo)
  861. c.cmdable = c.Process
  862. c.initHooks(hooks{
  863. dial: nil,
  864. process: c.process,
  865. pipeline: c.processPipeline,
  866. txPipeline: c.processTxPipeline,
  867. })
  868. return c
  869. }
  870. // Options returns read-only Options that were used to create the client.
  871. func (c *ClusterClient) Options() *ClusterOptions {
  872. return c.opt
  873. }
  874. // ReloadState reloads cluster state. If available it calls ClusterSlots func
  875. // to get cluster slots information.
  876. func (c *ClusterClient) ReloadState(ctx context.Context) {
  877. c.state.LazyReload()
  878. }
  879. // Close closes the cluster client, releasing any open resources.
  880. //
  881. // It is rare to Close a ClusterClient, as the ClusterClient is meant
  882. // to be long-lived and shared between many goroutines.
  883. func (c *ClusterClient) Close() error {
  884. return c.nodes.Close()
  885. }
  886. func (c *ClusterClient) Process(ctx context.Context, cmd Cmder) error {
  887. err := c.processHook(ctx, cmd)
  888. cmd.SetErr(err)
  889. return err
  890. }
  891. func (c *ClusterClient) process(ctx context.Context, cmd Cmder) error {
  892. slot := c.cmdSlot(cmd, -1)
  893. var node *clusterNode
  894. var moved bool
  895. var ask bool
  896. var lastErr error
  897. for attempt := 0; attempt <= c.opt.MaxRedirects; attempt++ {
  898. // MOVED and ASK responses are not transient errors that require retry delay; they
  899. // should be attempted immediately.
  900. if attempt > 0 && !moved && !ask {
  901. if err := internal.Sleep(ctx, c.retryBackoff(attempt)); err != nil {
  902. return err
  903. }
  904. }
  905. if node == nil {
  906. var err error
  907. node, err = c.cmdNode(ctx, cmd.Name(), slot)
  908. if err != nil {
  909. return err
  910. }
  911. }
  912. if ask {
  913. ask = false
  914. pipe := node.Client.Pipeline()
  915. _ = pipe.Process(ctx, NewCmd(ctx, "asking"))
  916. _ = pipe.Process(ctx, cmd)
  917. _, lastErr = pipe.Exec(ctx)
  918. } else {
  919. lastErr = node.Client.Process(ctx, cmd)
  920. }
  921. // If there is no error - we are done.
  922. if lastErr == nil {
  923. return nil
  924. }
  925. if isReadOnly := isReadOnlyError(lastErr); isReadOnly || lastErr == pool.ErrClosed {
  926. if isReadOnly {
  927. c.state.LazyReload()
  928. }
  929. node = nil
  930. continue
  931. }
  932. // If slave is loading - pick another node.
  933. if c.opt.ReadOnly && isLoadingError(lastErr) {
  934. node.MarkAsFailing()
  935. node = nil
  936. continue
  937. }
  938. var addr string
  939. moved, ask, addr = isMovedError(lastErr)
  940. if moved || ask {
  941. c.state.LazyReload()
  942. var err error
  943. node, err = c.nodes.GetOrCreate(addr)
  944. if err != nil {
  945. return err
  946. }
  947. continue
  948. }
  949. if shouldRetry(lastErr, cmd.readTimeout() == nil) {
  950. // First retry the same node.
  951. if attempt == 0 {
  952. continue
  953. }
  954. // Second try another node.
  955. node.MarkAsFailing()
  956. node = nil
  957. continue
  958. }
  959. return lastErr
  960. }
  961. return lastErr
  962. }
  963. func (c *ClusterClient) OnNewNode(fn func(rdb *Client)) {
  964. c.nodes.OnNewNode(fn)
  965. }
  966. // ForEachMaster concurrently calls the fn on each master node in the cluster.
  967. // It returns the first error if any.
  968. func (c *ClusterClient) ForEachMaster(
  969. ctx context.Context,
  970. fn func(ctx context.Context, client *Client) error,
  971. ) error {
  972. state, err := c.state.ReloadOrGet(ctx)
  973. if err != nil {
  974. return err
  975. }
  976. var wg sync.WaitGroup
  977. errCh := make(chan error, 1)
  978. for _, master := range state.Masters {
  979. wg.Add(1)
  980. go func(node *clusterNode) {
  981. defer wg.Done()
  982. err := fn(ctx, node.Client)
  983. if err != nil {
  984. select {
  985. case errCh <- err:
  986. default:
  987. }
  988. }
  989. }(master)
  990. }
  991. wg.Wait()
  992. select {
  993. case err := <-errCh:
  994. return err
  995. default:
  996. return nil
  997. }
  998. }
  999. // ForEachSlave concurrently calls the fn on each slave node in the cluster.
  1000. // It returns the first error if any.
  1001. func (c *ClusterClient) ForEachSlave(
  1002. ctx context.Context,
  1003. fn func(ctx context.Context, client *Client) error,
  1004. ) error {
  1005. state, err := c.state.ReloadOrGet(ctx)
  1006. if err != nil {
  1007. return err
  1008. }
  1009. var wg sync.WaitGroup
  1010. errCh := make(chan error, 1)
  1011. for _, slave := range state.Slaves {
  1012. wg.Add(1)
  1013. go func(node *clusterNode) {
  1014. defer wg.Done()
  1015. err := fn(ctx, node.Client)
  1016. if err != nil {
  1017. select {
  1018. case errCh <- err:
  1019. default:
  1020. }
  1021. }
  1022. }(slave)
  1023. }
  1024. wg.Wait()
  1025. select {
  1026. case err := <-errCh:
  1027. return err
  1028. default:
  1029. return nil
  1030. }
  1031. }
  1032. // ForEachShard concurrently calls the fn on each known node in the cluster.
  1033. // It returns the first error if any.
  1034. func (c *ClusterClient) ForEachShard(
  1035. ctx context.Context,
  1036. fn func(ctx context.Context, client *Client) error,
  1037. ) error {
  1038. state, err := c.state.ReloadOrGet(ctx)
  1039. if err != nil {
  1040. return err
  1041. }
  1042. var wg sync.WaitGroup
  1043. errCh := make(chan error, 1)
  1044. worker := func(node *clusterNode) {
  1045. defer wg.Done()
  1046. err := fn(ctx, node.Client)
  1047. if err != nil {
  1048. select {
  1049. case errCh <- err:
  1050. default:
  1051. }
  1052. }
  1053. }
  1054. for _, node := range state.Masters {
  1055. wg.Add(1)
  1056. go worker(node)
  1057. }
  1058. for _, node := range state.Slaves {
  1059. wg.Add(1)
  1060. go worker(node)
  1061. }
  1062. wg.Wait()
  1063. select {
  1064. case err := <-errCh:
  1065. return err
  1066. default:
  1067. return nil
  1068. }
  1069. }
  1070. // PoolStats returns accumulated connection pool stats.
  1071. func (c *ClusterClient) PoolStats() *PoolStats {
  1072. var acc PoolStats
  1073. state, _ := c.state.Get(context.TODO())
  1074. if state == nil {
  1075. return &acc
  1076. }
  1077. for _, node := range state.Masters {
  1078. s := node.Client.connPool.Stats()
  1079. acc.Hits += s.Hits
  1080. acc.Misses += s.Misses
  1081. acc.Timeouts += s.Timeouts
  1082. acc.TotalConns += s.TotalConns
  1083. acc.IdleConns += s.IdleConns
  1084. acc.StaleConns += s.StaleConns
  1085. }
  1086. for _, node := range state.Slaves {
  1087. s := node.Client.connPool.Stats()
  1088. acc.Hits += s.Hits
  1089. acc.Misses += s.Misses
  1090. acc.Timeouts += s.Timeouts
  1091. acc.TotalConns += s.TotalConns
  1092. acc.IdleConns += s.IdleConns
  1093. acc.StaleConns += s.StaleConns
  1094. }
  1095. return &acc
  1096. }
  1097. func (c *ClusterClient) loadState(ctx context.Context) (*clusterState, error) {
  1098. if c.opt.ClusterSlots != nil {
  1099. slots, err := c.opt.ClusterSlots(ctx)
  1100. if err != nil {
  1101. return nil, err
  1102. }
  1103. return newClusterState(c.nodes, slots, "")
  1104. }
  1105. addrs, err := c.nodes.Addrs()
  1106. if err != nil {
  1107. return nil, err
  1108. }
  1109. var firstErr error
  1110. for _, idx := range rand.Perm(len(addrs)) {
  1111. addr := addrs[idx]
  1112. node, err := c.nodes.GetOrCreate(addr)
  1113. if err != nil {
  1114. if firstErr == nil {
  1115. firstErr = err
  1116. }
  1117. continue
  1118. }
  1119. slots, err := node.Client.ClusterSlots(ctx).Result()
  1120. if err != nil {
  1121. if firstErr == nil {
  1122. firstErr = err
  1123. }
  1124. continue
  1125. }
  1126. return newClusterState(c.nodes, slots, addr)
  1127. }
  1128. /*
  1129. * No node is connectable. It's possible that all nodes' IP has changed.
  1130. * Clear activeAddrs to let client be able to re-connect using the initial
  1131. * setting of the addresses (e.g. [redis-cluster-0:6379, redis-cluster-1:6379]),
  1132. * which might have chance to resolve domain name and get updated IP address.
  1133. */
  1134. c.nodes.mu.Lock()
  1135. c.nodes.activeAddrs = nil
  1136. c.nodes.mu.Unlock()
  1137. return nil, firstErr
  1138. }
  1139. func (c *ClusterClient) Pipeline() Pipeliner {
  1140. pipe := Pipeline{
  1141. exec: pipelineExecer(c.processPipelineHook),
  1142. }
  1143. pipe.init()
  1144. return &pipe
  1145. }
  1146. func (c *ClusterClient) Pipelined(ctx context.Context, fn func(Pipeliner) error) ([]Cmder, error) {
  1147. return c.Pipeline().Pipelined(ctx, fn)
  1148. }
  1149. func (c *ClusterClient) processPipeline(ctx context.Context, cmds []Cmder) error {
  1150. cmdsMap := newCmdsMap()
  1151. if err := c.mapCmdsByNode(ctx, cmdsMap, cmds); err != nil {
  1152. setCmdsErr(cmds, err)
  1153. return err
  1154. }
  1155. for attempt := 0; attempt <= c.opt.MaxRedirects; attempt++ {
  1156. if attempt > 0 {
  1157. if err := internal.Sleep(ctx, c.retryBackoff(attempt)); err != nil {
  1158. setCmdsErr(cmds, err)
  1159. return err
  1160. }
  1161. }
  1162. failedCmds := newCmdsMap()
  1163. var wg sync.WaitGroup
  1164. for node, cmds := range cmdsMap.m {
  1165. wg.Add(1)
  1166. go func(node *clusterNode, cmds []Cmder) {
  1167. defer wg.Done()
  1168. c.processPipelineNode(ctx, node, cmds, failedCmds)
  1169. }(node, cmds)
  1170. }
  1171. wg.Wait()
  1172. if len(failedCmds.m) == 0 {
  1173. break
  1174. }
  1175. cmdsMap = failedCmds
  1176. }
  1177. return cmdsFirstErr(cmds)
  1178. }
  1179. func (c *ClusterClient) mapCmdsByNode(ctx context.Context, cmdsMap *cmdsMap, cmds []Cmder) error {
  1180. state, err := c.state.Get(ctx)
  1181. if err != nil {
  1182. return err
  1183. }
  1184. preferredRandomSlot := -1
  1185. if c.opt.ReadOnly && c.cmdsAreReadOnly(ctx, cmds) {
  1186. for _, cmd := range cmds {
  1187. slot := c.cmdSlot(cmd, preferredRandomSlot)
  1188. if preferredRandomSlot == -1 {
  1189. preferredRandomSlot = slot
  1190. }
  1191. node, err := c.slotReadOnlyNode(state, slot)
  1192. if err != nil {
  1193. return err
  1194. }
  1195. cmdsMap.Add(node, cmd)
  1196. }
  1197. return nil
  1198. }
  1199. for _, cmd := range cmds {
  1200. slot := c.cmdSlot(cmd, preferredRandomSlot)
  1201. if preferredRandomSlot == -1 {
  1202. preferredRandomSlot = slot
  1203. }
  1204. node, err := state.slotMasterNode(slot)
  1205. if err != nil {
  1206. return err
  1207. }
  1208. cmdsMap.Add(node, cmd)
  1209. }
  1210. return nil
  1211. }
  1212. func (c *ClusterClient) cmdsAreReadOnly(ctx context.Context, cmds []Cmder) bool {
  1213. for _, cmd := range cmds {
  1214. cmdInfo := c.cmdInfo(ctx, cmd.Name())
  1215. if cmdInfo == nil || !cmdInfo.ReadOnly {
  1216. return false
  1217. }
  1218. }
  1219. return true
  1220. }
  1221. func (c *ClusterClient) processPipelineNode(
  1222. ctx context.Context, node *clusterNode, cmds []Cmder, failedCmds *cmdsMap,
  1223. ) {
  1224. _ = node.Client.withProcessPipelineHook(ctx, cmds, func(ctx context.Context, cmds []Cmder) error {
  1225. cn, err := node.Client.getConn(ctx)
  1226. if err != nil {
  1227. if !isContextError(err) {
  1228. node.MarkAsFailing()
  1229. }
  1230. _ = c.mapCmdsByNode(ctx, failedCmds, cmds)
  1231. setCmdsErr(cmds, err)
  1232. return err
  1233. }
  1234. var processErr error
  1235. defer func() {
  1236. node.Client.releaseConn(ctx, cn, processErr)
  1237. }()
  1238. processErr = c.processPipelineNodeConn(ctx, node, cn, cmds, failedCmds)
  1239. return processErr
  1240. })
  1241. }
  1242. func (c *ClusterClient) processPipelineNodeConn(
  1243. ctx context.Context, node *clusterNode, cn *pool.Conn, cmds []Cmder, failedCmds *cmdsMap,
  1244. ) error {
  1245. if err := cn.WithWriter(c.context(ctx), c.opt.WriteTimeout, func(wr *proto.Writer) error {
  1246. return writeCmds(wr, cmds)
  1247. }); err != nil {
  1248. if isBadConn(err, false, node.Client.getAddr()) {
  1249. node.MarkAsFailing()
  1250. }
  1251. if shouldRetry(err, true) {
  1252. _ = c.mapCmdsByNode(ctx, failedCmds, cmds)
  1253. }
  1254. setCmdsErr(cmds, err)
  1255. return err
  1256. }
  1257. return cn.WithReader(c.context(ctx), c.opt.ReadTimeout, func(rd *proto.Reader) error {
  1258. return c.pipelineReadCmds(ctx, node, rd, cmds, failedCmds)
  1259. })
  1260. }
  1261. func (c *ClusterClient) pipelineReadCmds(
  1262. ctx context.Context,
  1263. node *clusterNode,
  1264. rd *proto.Reader,
  1265. cmds []Cmder,
  1266. failedCmds *cmdsMap,
  1267. ) error {
  1268. for i, cmd := range cmds {
  1269. err := cmd.readReply(rd)
  1270. cmd.SetErr(err)
  1271. if err == nil {
  1272. continue
  1273. }
  1274. if c.checkMovedErr(ctx, cmd, err, failedCmds) {
  1275. continue
  1276. }
  1277. if c.opt.ReadOnly && isBadConn(err, false, node.Client.getAddr()) {
  1278. node.MarkAsFailing()
  1279. }
  1280. if !isRedisError(err) {
  1281. if shouldRetry(err, true) {
  1282. _ = c.mapCmdsByNode(ctx, failedCmds, cmds)
  1283. }
  1284. setCmdsErr(cmds[i+1:], err)
  1285. return err
  1286. }
  1287. }
  1288. if err := cmds[0].Err(); err != nil && shouldRetry(err, true) {
  1289. _ = c.mapCmdsByNode(ctx, failedCmds, cmds)
  1290. return err
  1291. }
  1292. return nil
  1293. }
  1294. func (c *ClusterClient) checkMovedErr(
  1295. ctx context.Context, cmd Cmder, err error, failedCmds *cmdsMap,
  1296. ) bool {
  1297. moved, ask, addr := isMovedError(err)
  1298. if !moved && !ask {
  1299. return false
  1300. }
  1301. node, err := c.nodes.GetOrCreate(addr)
  1302. if err != nil {
  1303. return false
  1304. }
  1305. if moved {
  1306. c.state.LazyReload()
  1307. failedCmds.Add(node, cmd)
  1308. return true
  1309. }
  1310. if ask {
  1311. failedCmds.Add(node, NewCmd(ctx, "asking"), cmd)
  1312. return true
  1313. }
  1314. panic("not reached")
  1315. }
  1316. // TxPipeline acts like Pipeline, but wraps queued commands with MULTI/EXEC.
  1317. func (c *ClusterClient) TxPipeline() Pipeliner {
  1318. pipe := Pipeline{
  1319. exec: func(ctx context.Context, cmds []Cmder) error {
  1320. cmds = wrapMultiExec(ctx, cmds)
  1321. return c.processTxPipelineHook(ctx, cmds)
  1322. },
  1323. }
  1324. pipe.init()
  1325. return &pipe
  1326. }
  1327. func (c *ClusterClient) TxPipelined(ctx context.Context, fn func(Pipeliner) error) ([]Cmder, error) {
  1328. return c.TxPipeline().Pipelined(ctx, fn)
  1329. }
  1330. func (c *ClusterClient) processTxPipeline(ctx context.Context, cmds []Cmder) error {
  1331. // Trim multi .. exec.
  1332. cmds = cmds[1 : len(cmds)-1]
  1333. if len(cmds) == 0 {
  1334. return nil
  1335. }
  1336. state, err := c.state.Get(ctx)
  1337. if err != nil {
  1338. setCmdsErr(cmds, err)
  1339. return err
  1340. }
  1341. keyedCmdsBySlot := c.slottedKeyedCommands(cmds)
  1342. slot := -1
  1343. switch len(keyedCmdsBySlot) {
  1344. case 0:
  1345. slot = hashtag.RandomSlot()
  1346. case 1:
  1347. for sl := range keyedCmdsBySlot {
  1348. slot = sl
  1349. break
  1350. }
  1351. default:
  1352. // TxPipeline does not support cross slot transaction.
  1353. setCmdsErr(cmds, ErrCrossSlot)
  1354. return ErrCrossSlot
  1355. }
  1356. node, err := state.slotMasterNode(slot)
  1357. if err != nil {
  1358. setCmdsErr(cmds, err)
  1359. return err
  1360. }
  1361. cmdsMap := map[*clusterNode][]Cmder{node: cmds}
  1362. for attempt := 0; attempt <= c.opt.MaxRedirects; attempt++ {
  1363. if attempt > 0 {
  1364. if err := internal.Sleep(ctx, c.retryBackoff(attempt)); err != nil {
  1365. setCmdsErr(cmds, err)
  1366. return err
  1367. }
  1368. }
  1369. failedCmds := newCmdsMap()
  1370. var wg sync.WaitGroup
  1371. for node, cmds := range cmdsMap {
  1372. wg.Add(1)
  1373. go func(node *clusterNode, cmds []Cmder) {
  1374. defer wg.Done()
  1375. c.processTxPipelineNode(ctx, node, cmds, failedCmds)
  1376. }(node, cmds)
  1377. }
  1378. wg.Wait()
  1379. if len(failedCmds.m) == 0 {
  1380. break
  1381. }
  1382. cmdsMap = failedCmds.m
  1383. }
  1384. return cmdsFirstErr(cmds)
  1385. }
  1386. // slottedKeyedCommands returns a map of slot to commands taking into account
  1387. // only commands that have keys.
  1388. func (c *ClusterClient) slottedKeyedCommands(cmds []Cmder) map[int][]Cmder {
  1389. cmdsSlots := map[int][]Cmder{}
  1390. preferredRandomSlot := -1
  1391. for _, cmd := range cmds {
  1392. if cmdFirstKeyPos(cmd) == 0 {
  1393. continue
  1394. }
  1395. slot := c.cmdSlot(cmd, preferredRandomSlot)
  1396. if preferredRandomSlot == -1 {
  1397. preferredRandomSlot = slot
  1398. }
  1399. cmdsSlots[slot] = append(cmdsSlots[slot], cmd)
  1400. }
  1401. return cmdsSlots
  1402. }
  1403. func (c *ClusterClient) processTxPipelineNode(
  1404. ctx context.Context, node *clusterNode, cmds []Cmder, failedCmds *cmdsMap,
  1405. ) {
  1406. cmds = wrapMultiExec(ctx, cmds)
  1407. _ = node.Client.withProcessPipelineHook(ctx, cmds, func(ctx context.Context, cmds []Cmder) error {
  1408. cn, err := node.Client.getConn(ctx)
  1409. if err != nil {
  1410. _ = c.mapCmdsByNode(ctx, failedCmds, cmds)
  1411. setCmdsErr(cmds, err)
  1412. return err
  1413. }
  1414. var processErr error
  1415. defer func() {
  1416. node.Client.releaseConn(ctx, cn, processErr)
  1417. }()
  1418. processErr = c.processTxPipelineNodeConn(ctx, node, cn, cmds, failedCmds)
  1419. return processErr
  1420. })
  1421. }
  1422. func (c *ClusterClient) processTxPipelineNodeConn(
  1423. ctx context.Context, node *clusterNode, cn *pool.Conn, cmds []Cmder, failedCmds *cmdsMap,
  1424. ) error {
  1425. if err := cn.WithWriter(c.context(ctx), c.opt.WriteTimeout, func(wr *proto.Writer) error {
  1426. return writeCmds(wr, cmds)
  1427. }); err != nil {
  1428. if shouldRetry(err, true) {
  1429. _ = c.mapCmdsByNode(ctx, failedCmds, cmds)
  1430. }
  1431. setCmdsErr(cmds, err)
  1432. return err
  1433. }
  1434. return cn.WithReader(c.context(ctx), c.opt.ReadTimeout, func(rd *proto.Reader) error {
  1435. statusCmd := cmds[0].(*StatusCmd)
  1436. // Trim multi and exec.
  1437. trimmedCmds := cmds[1 : len(cmds)-1]
  1438. if err := c.txPipelineReadQueued(
  1439. ctx, node, cn, rd, statusCmd, trimmedCmds, failedCmds,
  1440. ); err != nil {
  1441. setCmdsErr(cmds, err)
  1442. moved, ask, addr := isMovedError(err)
  1443. if moved || ask {
  1444. return c.cmdsMoved(ctx, trimmedCmds, moved, ask, addr, failedCmds)
  1445. }
  1446. return err
  1447. }
  1448. return node.Client.pipelineReadCmds(ctx, cn, rd, trimmedCmds)
  1449. })
  1450. }
  1451. func (c *ClusterClient) txPipelineReadQueued(
  1452. ctx context.Context,
  1453. node *clusterNode,
  1454. cn *pool.Conn,
  1455. rd *proto.Reader,
  1456. statusCmd *StatusCmd,
  1457. cmds []Cmder,
  1458. failedCmds *cmdsMap,
  1459. ) error {
  1460. // Parse queued replies.
  1461. // To be sure there are no buffered push notifications, we process them before reading the reply
  1462. if err := node.Client.processPendingPushNotificationWithReader(ctx, cn, rd); err != nil {
  1463. // Log the error but don't fail the command execution
  1464. // Push notification processing errors shouldn't break normal Redis operations
  1465. internal.Logger.Printf(ctx, "push: error processing pending notifications before reading reply: %v", err)
  1466. }
  1467. if err := statusCmd.readReply(rd); err != nil {
  1468. return err
  1469. }
  1470. for _, cmd := range cmds {
  1471. // To be sure there are no buffered push notifications, we process them before reading the reply
  1472. if err := node.Client.processPendingPushNotificationWithReader(ctx, cn, rd); err != nil {
  1473. // Log the error but don't fail the command execution
  1474. // Push notification processing errors shouldn't break normal Redis operations
  1475. internal.Logger.Printf(ctx, "push: error processing pending notifications before reading reply: %v", err)
  1476. }
  1477. err := statusCmd.readReply(rd)
  1478. if err != nil {
  1479. if c.checkMovedErr(ctx, cmd, err, failedCmds) {
  1480. // will be processed later
  1481. continue
  1482. }
  1483. cmd.SetErr(err)
  1484. if !isRedisError(err) {
  1485. return err
  1486. }
  1487. }
  1488. }
  1489. // To be sure there are no buffered push notifications, we process them before reading the reply
  1490. if err := node.Client.processPendingPushNotificationWithReader(ctx, cn, rd); err != nil {
  1491. // Log the error but don't fail the command execution
  1492. // Push notification processing errors shouldn't break normal Redis operations
  1493. internal.Logger.Printf(ctx, "push: error processing pending notifications before reading reply: %v", err)
  1494. }
  1495. // Parse number of replies.
  1496. line, err := rd.ReadLine()
  1497. if err != nil {
  1498. if err == Nil {
  1499. err = TxFailedErr
  1500. }
  1501. return err
  1502. }
  1503. if line[0] != proto.RespArray {
  1504. return fmt.Errorf("redis: expected '*', but got line %q", line)
  1505. }
  1506. return nil
  1507. }
  1508. func (c *ClusterClient) cmdsMoved(
  1509. ctx context.Context, cmds []Cmder,
  1510. moved, ask bool,
  1511. addr string,
  1512. failedCmds *cmdsMap,
  1513. ) error {
  1514. node, err := c.nodes.GetOrCreate(addr)
  1515. if err != nil {
  1516. return err
  1517. }
  1518. if moved {
  1519. c.state.LazyReload()
  1520. for _, cmd := range cmds {
  1521. failedCmds.Add(node, cmd)
  1522. }
  1523. return nil
  1524. }
  1525. if ask {
  1526. for _, cmd := range cmds {
  1527. failedCmds.Add(node, NewCmd(ctx, "asking"), cmd)
  1528. }
  1529. return nil
  1530. }
  1531. return nil
  1532. }
  1533. func (c *ClusterClient) Watch(ctx context.Context, fn func(*Tx) error, keys ...string) error {
  1534. if len(keys) == 0 {
  1535. return fmt.Errorf("redis: Watch requires at least one key")
  1536. }
  1537. slot := hashtag.Slot(keys[0])
  1538. for _, key := range keys[1:] {
  1539. if hashtag.Slot(key) != slot {
  1540. err := fmt.Errorf("redis: Watch requires all keys to be in the same slot")
  1541. return err
  1542. }
  1543. }
  1544. node, err := c.slotMasterNode(ctx, slot)
  1545. if err != nil {
  1546. return err
  1547. }
  1548. for attempt := 0; attempt <= c.opt.MaxRedirects; attempt++ {
  1549. if attempt > 0 {
  1550. if err := internal.Sleep(ctx, c.retryBackoff(attempt)); err != nil {
  1551. return err
  1552. }
  1553. }
  1554. err = node.Client.Watch(ctx, fn, keys...)
  1555. if err == nil {
  1556. break
  1557. }
  1558. moved, ask, addr := isMovedError(err)
  1559. if moved || ask {
  1560. node, err = c.nodes.GetOrCreate(addr)
  1561. if err != nil {
  1562. return err
  1563. }
  1564. continue
  1565. }
  1566. if isReadOnly := isReadOnlyError(err); isReadOnly || err == pool.ErrClosed {
  1567. if isReadOnly {
  1568. c.state.LazyReload()
  1569. }
  1570. node, err = c.slotMasterNode(ctx, slot)
  1571. if err != nil {
  1572. return err
  1573. }
  1574. continue
  1575. }
  1576. if shouldRetry(err, true) {
  1577. continue
  1578. }
  1579. return err
  1580. }
  1581. return err
  1582. }
  1583. // maintenance notifications won't work here for now
  1584. func (c *ClusterClient) pubSub() *PubSub {
  1585. var node *clusterNode
  1586. pubsub := &PubSub{
  1587. opt: c.opt.clientOptions(),
  1588. newConn: func(ctx context.Context, addr string, channels []string) (*pool.Conn, error) {
  1589. if node != nil {
  1590. panic("node != nil")
  1591. }
  1592. var err error
  1593. if len(channels) > 0 {
  1594. slot := hashtag.Slot(channels[0])
  1595. // newConn in PubSub is only used for subscription connections, so it is safe to
  1596. // assume that a slave node can always be used when client options specify ReadOnly.
  1597. if c.opt.ReadOnly {
  1598. state, err := c.state.Get(ctx)
  1599. if err != nil {
  1600. return nil, err
  1601. }
  1602. node, err = c.slotReadOnlyNode(state, slot)
  1603. if err != nil {
  1604. return nil, err
  1605. }
  1606. } else {
  1607. node, err = c.slotMasterNode(ctx, slot)
  1608. if err != nil {
  1609. return nil, err
  1610. }
  1611. }
  1612. } else {
  1613. node, err = c.nodes.Random()
  1614. if err != nil {
  1615. return nil, err
  1616. }
  1617. }
  1618. cn, err := node.Client.pubSubPool.NewConn(ctx, node.Client.opt.Network, node.Client.opt.Addr, channels)
  1619. if err != nil {
  1620. node = nil
  1621. return nil, err
  1622. }
  1623. // will return nil if already initialized
  1624. err = node.Client.initConn(ctx, cn)
  1625. if err != nil {
  1626. _ = cn.Close()
  1627. node = nil
  1628. return nil, err
  1629. }
  1630. node.Client.pubSubPool.TrackConn(cn)
  1631. return cn, nil
  1632. },
  1633. closeConn: func(cn *pool.Conn) error {
  1634. // Untrack connection from PubSubPool
  1635. node.Client.pubSubPool.UntrackConn(cn)
  1636. err := cn.Close()
  1637. node = nil
  1638. return err
  1639. },
  1640. }
  1641. pubsub.init()
  1642. return pubsub
  1643. }
  1644. // Subscribe subscribes the client to the specified channels.
  1645. // Channels can be omitted to create empty subscription.
  1646. func (c *ClusterClient) Subscribe(ctx context.Context, channels ...string) *PubSub {
  1647. pubsub := c.pubSub()
  1648. if len(channels) > 0 {
  1649. _ = pubsub.Subscribe(ctx, channels...)
  1650. }
  1651. return pubsub
  1652. }
  1653. // PSubscribe subscribes the client to the given patterns.
  1654. // Patterns can be omitted to create empty subscription.
  1655. func (c *ClusterClient) PSubscribe(ctx context.Context, channels ...string) *PubSub {
  1656. pubsub := c.pubSub()
  1657. if len(channels) > 0 {
  1658. _ = pubsub.PSubscribe(ctx, channels...)
  1659. }
  1660. return pubsub
  1661. }
  1662. // SSubscribe Subscribes the client to the specified shard channels.
  1663. func (c *ClusterClient) SSubscribe(ctx context.Context, channels ...string) *PubSub {
  1664. pubsub := c.pubSub()
  1665. if len(channels) > 0 {
  1666. _ = pubsub.SSubscribe(ctx, channels...)
  1667. }
  1668. return pubsub
  1669. }
  1670. func (c *ClusterClient) retryBackoff(attempt int) time.Duration {
  1671. return internal.RetryBackoff(attempt, c.opt.MinRetryBackoff, c.opt.MaxRetryBackoff)
  1672. }
  1673. func (c *ClusterClient) cmdsInfo(ctx context.Context) (map[string]*CommandInfo, error) {
  1674. // Try 3 random nodes.
  1675. const nodeLimit = 3
  1676. addrs, err := c.nodes.Addrs()
  1677. if err != nil {
  1678. return nil, err
  1679. }
  1680. var firstErr error
  1681. perm := rand.Perm(len(addrs))
  1682. if len(perm) > nodeLimit {
  1683. perm = perm[:nodeLimit]
  1684. }
  1685. for _, idx := range perm {
  1686. addr := addrs[idx]
  1687. node, err := c.nodes.GetOrCreate(addr)
  1688. if err != nil {
  1689. if firstErr == nil {
  1690. firstErr = err
  1691. }
  1692. continue
  1693. }
  1694. info, err := node.Client.Command(ctx).Result()
  1695. if err == nil {
  1696. return info, nil
  1697. }
  1698. if firstErr == nil {
  1699. firstErr = err
  1700. }
  1701. }
  1702. if firstErr == nil {
  1703. panic("not reached")
  1704. }
  1705. return nil, firstErr
  1706. }
  1707. func (c *ClusterClient) cmdInfo(ctx context.Context, name string) *CommandInfo {
  1708. cmdsInfo, err := c.cmdsInfoCache.Get(ctx)
  1709. if err != nil {
  1710. internal.Logger.Printf(context.TODO(), "getting command info: %s", err)
  1711. return nil
  1712. }
  1713. info := cmdsInfo[name]
  1714. if info == nil {
  1715. internal.Logger.Printf(context.TODO(), "info for cmd=%s not found", name)
  1716. }
  1717. return info
  1718. }
  1719. func (c *ClusterClient) cmdSlot(cmd Cmder, preferredRandomSlot int) int {
  1720. args := cmd.Args()
  1721. if args[0] == "cluster" && (args[1] == "getkeysinslot" || args[1] == "countkeysinslot") {
  1722. return args[2].(int)
  1723. }
  1724. return cmdSlot(cmd, cmdFirstKeyPos(cmd), preferredRandomSlot)
  1725. }
  1726. func cmdSlot(cmd Cmder, pos int, preferredRandomSlot int) int {
  1727. if pos == 0 {
  1728. if preferredRandomSlot != -1 {
  1729. return preferredRandomSlot
  1730. }
  1731. return hashtag.RandomSlot()
  1732. }
  1733. firstKey := cmd.stringArg(pos)
  1734. return hashtag.Slot(firstKey)
  1735. }
  1736. func (c *ClusterClient) cmdNode(
  1737. ctx context.Context,
  1738. cmdName string,
  1739. slot int,
  1740. ) (*clusterNode, error) {
  1741. state, err := c.state.Get(ctx)
  1742. if err != nil {
  1743. return nil, err
  1744. }
  1745. if c.opt.ReadOnly {
  1746. cmdInfo := c.cmdInfo(ctx, cmdName)
  1747. if cmdInfo != nil && cmdInfo.ReadOnly {
  1748. return c.slotReadOnlyNode(state, slot)
  1749. }
  1750. }
  1751. return state.slotMasterNode(slot)
  1752. }
  1753. func (c *ClusterClient) slotReadOnlyNode(state *clusterState, slot int) (*clusterNode, error) {
  1754. if c.opt.RouteByLatency {
  1755. return state.slotClosestNode(slot)
  1756. }
  1757. if c.opt.RouteRandomly {
  1758. return state.slotRandomNode(slot)
  1759. }
  1760. return state.slotSlaveNode(slot)
  1761. }
  1762. func (c *ClusterClient) slotMasterNode(ctx context.Context, slot int) (*clusterNode, error) {
  1763. state, err := c.state.Get(ctx)
  1764. if err != nil {
  1765. return nil, err
  1766. }
  1767. return state.slotMasterNode(slot)
  1768. }
  1769. // SlaveForKey gets a client for a replica node to run any command on it.
  1770. // This is especially useful if we want to run a particular lua script which has
  1771. // only read only commands on the replica.
  1772. // This is because other redis commands generally have a flag that points that
  1773. // they are read only and automatically run on the replica nodes
  1774. // if ClusterOptions.ReadOnly flag is set to true.
  1775. func (c *ClusterClient) SlaveForKey(ctx context.Context, key string) (*Client, error) {
  1776. state, err := c.state.Get(ctx)
  1777. if err != nil {
  1778. return nil, err
  1779. }
  1780. slot := hashtag.Slot(key)
  1781. node, err := c.slotReadOnlyNode(state, slot)
  1782. if err != nil {
  1783. return nil, err
  1784. }
  1785. return node.Client, err
  1786. }
  1787. // MasterForKey return a client to the master node for a particular key.
  1788. func (c *ClusterClient) MasterForKey(ctx context.Context, key string) (*Client, error) {
  1789. slot := hashtag.Slot(key)
  1790. node, err := c.slotMasterNode(ctx, slot)
  1791. if err != nil {
  1792. return nil, err
  1793. }
  1794. return node.Client, nil
  1795. }
  1796. func (c *ClusterClient) context(ctx context.Context) context.Context {
  1797. if c.opt.ContextTimeoutEnabled {
  1798. return ctx
  1799. }
  1800. return context.Background()
  1801. }
  1802. func appendIfNotExist[T comparable](vals []T, newVal T) []T {
  1803. for _, v := range vals {
  1804. if v == newVal {
  1805. return vals
  1806. }
  1807. }
  1808. return append(vals, newVal)
  1809. }
  1810. //------------------------------------------------------------------------------
  1811. type cmdsMap struct {
  1812. mu sync.Mutex
  1813. m map[*clusterNode][]Cmder
  1814. }
  1815. func newCmdsMap() *cmdsMap {
  1816. return &cmdsMap{
  1817. m: make(map[*clusterNode][]Cmder),
  1818. }
  1819. }
  1820. func (m *cmdsMap) Add(node *clusterNode, cmds ...Cmder) {
  1821. m.mu.Lock()
  1822. m.m[node] = append(m.m[node], cmds...)
  1823. m.mu.Unlock()
  1824. }