25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

459 satır
15 KiB

  1. package maintnotifications
  2. import (
  3. "context"
  4. "net"
  5. "runtime"
  6. "strings"
  7. "time"
  8. "github.com/redis/go-redis/v9/internal"
  9. "github.com/redis/go-redis/v9/internal/maintnotifications/logs"
  10. "github.com/redis/go-redis/v9/internal/util"
  11. )
  12. // Mode represents the maintenance notifications mode
  13. type Mode string
  14. // Constants for maintenance push notifications modes
  15. const (
  16. ModeDisabled Mode = "disabled" // Client doesn't send CLIENT MAINT_NOTIFICATIONS ON command
  17. ModeEnabled Mode = "enabled" // Client forcefully sends command, interrupts connection on error
  18. ModeAuto Mode = "auto" // Client tries to send command, disables feature on error
  19. )
  20. // IsValid returns true if the maintenance notifications mode is valid
  21. func (m Mode) IsValid() bool {
  22. switch m {
  23. case ModeDisabled, ModeEnabled, ModeAuto:
  24. return true
  25. default:
  26. return false
  27. }
  28. }
  29. // String returns the string representation of the mode
  30. func (m Mode) String() string {
  31. return string(m)
  32. }
  33. // EndpointType represents the type of endpoint to request in MOVING notifications
  34. type EndpointType string
  35. // Constants for endpoint types
  36. const (
  37. EndpointTypeAuto EndpointType = "auto" // Auto-detect based on connection
  38. EndpointTypeInternalIP EndpointType = "internal-ip" // Internal IP address
  39. EndpointTypeInternalFQDN EndpointType = "internal-fqdn" // Internal FQDN
  40. EndpointTypeExternalIP EndpointType = "external-ip" // External IP address
  41. EndpointTypeExternalFQDN EndpointType = "external-fqdn" // External FQDN
  42. EndpointTypeNone EndpointType = "none" // No endpoint (reconnect with current config)
  43. )
  44. // IsValid returns true if the endpoint type is valid
  45. func (e EndpointType) IsValid() bool {
  46. switch e {
  47. case EndpointTypeAuto, EndpointTypeInternalIP, EndpointTypeInternalFQDN,
  48. EndpointTypeExternalIP, EndpointTypeExternalFQDN, EndpointTypeNone:
  49. return true
  50. default:
  51. return false
  52. }
  53. }
  54. // String returns the string representation of the endpoint type
  55. func (e EndpointType) String() string {
  56. return string(e)
  57. }
  58. // Config provides configuration options for maintenance notifications
  59. type Config struct {
  60. // Mode controls how client maintenance notifications are handled.
  61. // Valid values: ModeDisabled, ModeEnabled, ModeAuto
  62. // Default: ModeAuto
  63. Mode Mode
  64. // EndpointType specifies the type of endpoint to request in MOVING notifications.
  65. // Valid values: EndpointTypeAuto, EndpointTypeInternalIP, EndpointTypeInternalFQDN,
  66. // EndpointTypeExternalIP, EndpointTypeExternalFQDN, EndpointTypeNone
  67. // Default: EndpointTypeAuto
  68. EndpointType EndpointType
  69. // RelaxedTimeout is the concrete timeout value to use during
  70. // MIGRATING/FAILING_OVER states to accommodate increased latency.
  71. // This applies to both read and write timeouts.
  72. // Default: 10 seconds
  73. RelaxedTimeout time.Duration
  74. // HandoffTimeout is the maximum time to wait for connection handoff to complete.
  75. // If handoff takes longer than this, the old connection will be forcibly closed.
  76. // Default: 15 seconds (matches server-side eviction timeout)
  77. HandoffTimeout time.Duration
  78. // MaxWorkers is the maximum number of worker goroutines for processing handoff requests.
  79. // Workers are created on-demand and automatically cleaned up when idle.
  80. // If zero, defaults to min(10, PoolSize/2) to handle bursts effectively.
  81. // If explicitly set, enforces minimum of PoolSize/2
  82. //
  83. // Default: min(PoolSize/2, max(10, PoolSize/3)), Minimum when set: PoolSize/2
  84. MaxWorkers int
  85. // HandoffQueueSize is the size of the buffered channel used to queue handoff requests.
  86. // If the queue is full, new handoff requests will be rejected.
  87. // Scales with both worker count and pool size for better burst handling.
  88. //
  89. // Default: max(20×MaxWorkers, PoolSize), capped by MaxActiveConns+1 (if set) or 5×PoolSize
  90. // When set: minimum 200, capped by MaxActiveConns+1 (if set) or 5×PoolSize
  91. HandoffQueueSize int
  92. // PostHandoffRelaxedDuration is how long to keep relaxed timeouts on the new connection
  93. // after a handoff completes. This provides additional resilience during cluster transitions.
  94. // Default: 2 * RelaxedTimeout
  95. PostHandoffRelaxedDuration time.Duration
  96. // Circuit breaker configuration for endpoint failure handling
  97. // CircuitBreakerFailureThreshold is the number of failures before opening the circuit.
  98. // Default: 5
  99. CircuitBreakerFailureThreshold int
  100. // CircuitBreakerResetTimeout is how long to wait before testing if the endpoint recovered.
  101. // Default: 60 seconds
  102. CircuitBreakerResetTimeout time.Duration
  103. // CircuitBreakerMaxRequests is the maximum number of requests allowed in half-open state.
  104. // Default: 3
  105. CircuitBreakerMaxRequests int
  106. // MaxHandoffRetries is the maximum number of times to retry a failed handoff.
  107. // After this many retries, the connection will be removed from the pool.
  108. // Default: 3
  109. MaxHandoffRetries int
  110. }
  111. func (c *Config) IsEnabled() bool {
  112. return c != nil && c.Mode != ModeDisabled
  113. }
  114. // DefaultConfig returns a Config with sensible defaults.
  115. func DefaultConfig() *Config {
  116. return &Config{
  117. Mode: ModeAuto, // Enable by default for Redis Cloud
  118. EndpointType: EndpointTypeAuto, // Auto-detect based on connection
  119. RelaxedTimeout: 10 * time.Second,
  120. HandoffTimeout: 15 * time.Second,
  121. MaxWorkers: 0, // Auto-calculated based on pool size
  122. HandoffQueueSize: 0, // Auto-calculated based on max workers
  123. PostHandoffRelaxedDuration: 0, // Auto-calculated based on relaxed timeout
  124. // Circuit breaker configuration
  125. CircuitBreakerFailureThreshold: 5,
  126. CircuitBreakerResetTimeout: 60 * time.Second,
  127. CircuitBreakerMaxRequests: 3,
  128. // Connection Handoff Configuration
  129. MaxHandoffRetries: 3,
  130. }
  131. }
  132. // Validate checks if the configuration is valid.
  133. func (c *Config) Validate() error {
  134. if c.RelaxedTimeout <= 0 {
  135. return ErrInvalidRelaxedTimeout
  136. }
  137. if c.HandoffTimeout <= 0 {
  138. return ErrInvalidHandoffTimeout
  139. }
  140. // Validate worker configuration
  141. // Allow 0 for auto-calculation, but negative values are invalid
  142. if c.MaxWorkers < 0 {
  143. return ErrInvalidHandoffWorkers
  144. }
  145. // HandoffQueueSize validation - allow 0 for auto-calculation
  146. if c.HandoffQueueSize < 0 {
  147. return ErrInvalidHandoffQueueSize
  148. }
  149. if c.PostHandoffRelaxedDuration < 0 {
  150. return ErrInvalidPostHandoffRelaxedDuration
  151. }
  152. // Circuit breaker validation
  153. if c.CircuitBreakerFailureThreshold < 1 {
  154. return ErrInvalidCircuitBreakerFailureThreshold
  155. }
  156. if c.CircuitBreakerResetTimeout < 0 {
  157. return ErrInvalidCircuitBreakerResetTimeout
  158. }
  159. if c.CircuitBreakerMaxRequests < 1 {
  160. return ErrInvalidCircuitBreakerMaxRequests
  161. }
  162. // Validate Mode (maintenance notifications mode)
  163. if !c.Mode.IsValid() {
  164. return ErrInvalidMaintNotifications
  165. }
  166. // Validate EndpointType
  167. if !c.EndpointType.IsValid() {
  168. return ErrInvalidEndpointType
  169. }
  170. // Validate configuration fields
  171. if c.MaxHandoffRetries < 1 || c.MaxHandoffRetries > 10 {
  172. return ErrInvalidHandoffRetries
  173. }
  174. return nil
  175. }
  176. // ApplyDefaults applies default values to any zero-value fields in the configuration.
  177. // This ensures that partially configured structs get sensible defaults for missing fields.
  178. func (c *Config) ApplyDefaults() *Config {
  179. return c.ApplyDefaultsWithPoolSize(0)
  180. }
  181. // ApplyDefaultsWithPoolSize applies default values to any zero-value fields in the configuration,
  182. // using the provided pool size to calculate worker defaults.
  183. // This ensures that partially configured structs get sensible defaults for missing fields.
  184. func (c *Config) ApplyDefaultsWithPoolSize(poolSize int) *Config {
  185. return c.ApplyDefaultsWithPoolConfig(poolSize, 0)
  186. }
  187. // ApplyDefaultsWithPoolConfig applies default values to any zero-value fields in the configuration,
  188. // using the provided pool size and max active connections to calculate worker and queue defaults.
  189. // This ensures that partially configured structs get sensible defaults for missing fields.
  190. func (c *Config) ApplyDefaultsWithPoolConfig(poolSize int, maxActiveConns int) *Config {
  191. if c == nil {
  192. return DefaultConfig().ApplyDefaultsWithPoolSize(poolSize)
  193. }
  194. defaults := DefaultConfig()
  195. result := &Config{}
  196. // Apply defaults for enum fields (empty/zero means not set)
  197. result.Mode = defaults.Mode
  198. if c.Mode != "" {
  199. result.Mode = c.Mode
  200. }
  201. result.EndpointType = defaults.EndpointType
  202. if c.EndpointType != "" {
  203. result.EndpointType = c.EndpointType
  204. }
  205. // Apply defaults for duration fields (zero means not set)
  206. result.RelaxedTimeout = defaults.RelaxedTimeout
  207. if c.RelaxedTimeout > 0 {
  208. result.RelaxedTimeout = c.RelaxedTimeout
  209. }
  210. result.HandoffTimeout = defaults.HandoffTimeout
  211. if c.HandoffTimeout > 0 {
  212. result.HandoffTimeout = c.HandoffTimeout
  213. }
  214. // Copy worker configuration
  215. result.MaxWorkers = c.MaxWorkers
  216. // Apply worker defaults based on pool size
  217. result.applyWorkerDefaults(poolSize)
  218. // Apply queue size defaults with new scaling approach
  219. // Default: max(20x workers, PoolSize), capped by maxActiveConns or 5x pool size
  220. workerBasedSize := result.MaxWorkers * 20
  221. poolBasedSize := poolSize
  222. result.HandoffQueueSize = util.Max(workerBasedSize, poolBasedSize)
  223. if c.HandoffQueueSize > 0 {
  224. // When explicitly set: enforce minimum of 200
  225. result.HandoffQueueSize = util.Max(200, c.HandoffQueueSize)
  226. }
  227. // Cap queue size: use maxActiveConns+1 if set, otherwise 5x pool size
  228. var queueCap int
  229. if maxActiveConns > 0 {
  230. queueCap = maxActiveConns + 1
  231. // Ensure queue cap is at least 2 for very small maxActiveConns
  232. if queueCap < 2 {
  233. queueCap = 2
  234. }
  235. } else {
  236. queueCap = poolSize * 5
  237. }
  238. result.HandoffQueueSize = util.Min(result.HandoffQueueSize, queueCap)
  239. // Ensure minimum queue size of 2 (fallback for very small pools)
  240. if result.HandoffQueueSize < 2 {
  241. result.HandoffQueueSize = 2
  242. }
  243. result.PostHandoffRelaxedDuration = result.RelaxedTimeout * 2
  244. if c.PostHandoffRelaxedDuration > 0 {
  245. result.PostHandoffRelaxedDuration = c.PostHandoffRelaxedDuration
  246. }
  247. // Apply defaults for configuration fields
  248. result.MaxHandoffRetries = defaults.MaxHandoffRetries
  249. if c.MaxHandoffRetries > 0 {
  250. result.MaxHandoffRetries = c.MaxHandoffRetries
  251. }
  252. // Circuit breaker configuration
  253. result.CircuitBreakerFailureThreshold = defaults.CircuitBreakerFailureThreshold
  254. if c.CircuitBreakerFailureThreshold > 0 {
  255. result.CircuitBreakerFailureThreshold = c.CircuitBreakerFailureThreshold
  256. }
  257. result.CircuitBreakerResetTimeout = defaults.CircuitBreakerResetTimeout
  258. if c.CircuitBreakerResetTimeout > 0 {
  259. result.CircuitBreakerResetTimeout = c.CircuitBreakerResetTimeout
  260. }
  261. result.CircuitBreakerMaxRequests = defaults.CircuitBreakerMaxRequests
  262. if c.CircuitBreakerMaxRequests > 0 {
  263. result.CircuitBreakerMaxRequests = c.CircuitBreakerMaxRequests
  264. }
  265. if internal.LogLevel.DebugOrAbove() {
  266. internal.Logger.Printf(context.Background(), logs.DebugLoggingEnabled())
  267. internal.Logger.Printf(context.Background(), logs.ConfigDebug(result))
  268. }
  269. return result
  270. }
  271. // Clone creates a deep copy of the configuration.
  272. func (c *Config) Clone() *Config {
  273. if c == nil {
  274. return DefaultConfig()
  275. }
  276. return &Config{
  277. Mode: c.Mode,
  278. EndpointType: c.EndpointType,
  279. RelaxedTimeout: c.RelaxedTimeout,
  280. HandoffTimeout: c.HandoffTimeout,
  281. MaxWorkers: c.MaxWorkers,
  282. HandoffQueueSize: c.HandoffQueueSize,
  283. PostHandoffRelaxedDuration: c.PostHandoffRelaxedDuration,
  284. // Circuit breaker configuration
  285. CircuitBreakerFailureThreshold: c.CircuitBreakerFailureThreshold,
  286. CircuitBreakerResetTimeout: c.CircuitBreakerResetTimeout,
  287. CircuitBreakerMaxRequests: c.CircuitBreakerMaxRequests,
  288. // Configuration fields
  289. MaxHandoffRetries: c.MaxHandoffRetries,
  290. }
  291. }
  292. // applyWorkerDefaults calculates and applies worker defaults based on pool size
  293. func (c *Config) applyWorkerDefaults(poolSize int) {
  294. // Calculate defaults based on pool size
  295. if poolSize <= 0 {
  296. poolSize = 10 * runtime.GOMAXPROCS(0)
  297. }
  298. // When not set: min(poolSize/2, max(10, poolSize/3)) - balanced scaling approach
  299. originalMaxWorkers := c.MaxWorkers
  300. c.MaxWorkers = util.Min(poolSize/2, util.Max(10, poolSize/3))
  301. if originalMaxWorkers != 0 {
  302. // When explicitly set: max(poolSize/2, set_value) - ensure at least poolSize/2 workers
  303. c.MaxWorkers = util.Max(poolSize/2, originalMaxWorkers)
  304. }
  305. // Ensure minimum of 1 worker (fallback for very small pools)
  306. if c.MaxWorkers < 1 {
  307. c.MaxWorkers = 1
  308. }
  309. }
  310. // DetectEndpointType automatically detects the appropriate endpoint type
  311. // based on the connection address and TLS configuration.
  312. //
  313. // For IP addresses:
  314. // - If TLS is enabled: requests FQDN for proper certificate validation
  315. // - If TLS is disabled: requests IP for better performance
  316. //
  317. // For hostnames:
  318. // - If TLS is enabled: always requests FQDN for proper certificate validation
  319. // - If TLS is disabled: requests IP for better performance
  320. //
  321. // Internal vs External detection:
  322. // - For IPs: uses private IP range detection
  323. // - For hostnames: uses heuristics based on common internal naming patterns
  324. func DetectEndpointType(addr string, tlsEnabled bool) EndpointType {
  325. // Extract host from "host:port" format
  326. host, _, err := net.SplitHostPort(addr)
  327. if err != nil {
  328. host = addr // Assume no port
  329. }
  330. // Check if the host is an IP address or hostname
  331. ip := net.ParseIP(host)
  332. isIPAddress := ip != nil
  333. var endpointType EndpointType
  334. if isIPAddress {
  335. // Address is an IP - determine if it's private or public
  336. isPrivate := ip.IsPrivate() || ip.IsLoopback() || ip.IsLinkLocalUnicast()
  337. if tlsEnabled {
  338. // TLS with IP addresses - still prefer FQDN for certificate validation
  339. if isPrivate {
  340. endpointType = EndpointTypeInternalFQDN
  341. } else {
  342. endpointType = EndpointTypeExternalFQDN
  343. }
  344. } else {
  345. // No TLS - can use IP addresses directly
  346. if isPrivate {
  347. endpointType = EndpointTypeInternalIP
  348. } else {
  349. endpointType = EndpointTypeExternalIP
  350. }
  351. }
  352. } else {
  353. // Address is a hostname
  354. isInternalHostname := isInternalHostname(host)
  355. if isInternalHostname {
  356. endpointType = EndpointTypeInternalFQDN
  357. } else {
  358. endpointType = EndpointTypeExternalFQDN
  359. }
  360. }
  361. return endpointType
  362. }
  363. // isInternalHostname determines if a hostname appears to be internal/private.
  364. // This is a heuristic based on common naming patterns.
  365. func isInternalHostname(hostname string) bool {
  366. // Convert to lowercase for comparison
  367. hostname = strings.ToLower(hostname)
  368. // Common internal hostname patterns
  369. internalPatterns := []string{
  370. "localhost",
  371. ".local",
  372. ".internal",
  373. ".corp",
  374. ".lan",
  375. ".intranet",
  376. ".private",
  377. }
  378. // Check for exact match or suffix match
  379. for _, pattern := range internalPatterns {
  380. if hostname == pattern || strings.HasSuffix(hostname, pattern) {
  381. return true
  382. }
  383. }
  384. // Check for RFC 1918 style hostnames (e.g., redis-1, db-server, etc.)
  385. // If hostname doesn't contain dots, it's likely internal
  386. if !strings.Contains(hostname, ".") {
  387. return true
  388. }
  389. // Default to external for fully qualified domain names
  390. return false
  391. }