After certain load of GEOADD & BRPOP, Redis/Docker responds with errors

147 Views Asked by At

I am using go-redis to connect to Redis server running on docker desktop while running my go app straight on my mac.

This my client setup:

package redis

import (
    "fmt"
    "os"

    "github.com/go-redis/redis/v8"
)

var redisClient *RedisClient

type RedisClient struct {
    *redis.Client
}

func GetRedisClient() *RedisClient {
    if redisClient != nil {
        return redisClient
    }
    host := os.Getenv("REDIS_HOST")
    port := os.Getenv("REDIS_PORT")
    password := os.Getenv("REDIS_PASS")

    client := redis.NewClient(&redis.Options{
        Addr:     fmt.Sprintf("%s:%s", host, port),
        Password: password, // no password set
        DB:       0,
    })

    redisClient = &RedisClient{
        Client: client,
    }

    return redisClient

}

Docker:

version: "3.8"

services:

    redis:
        container_name: redis
        image: redis:6.2
        ports:
            - "6379:6379"
        ulimits:
          nofile:
            soft: 65536
            hard: 65536

The app will expose websocket connections to drivers that will communicate their current location every second and then save them in Redis using GEOADD. Also the app will expose another set of websocket connections to the same drivers for general notifications if any using BRPOP. After 70 driver of websocket connections I get errors from the extra drivers trying to connect. The errors come from the function that saves the location to Redis. The errors I get: dial tcp [::1]:6379: socket: too many open files and sometimes dial tcp: lookup localhost: no such host

func (r *RedisClient) SetPoint(ctx context.Context, item *Identifier, loc *Location) error {
    geoLocaiton := &redis.GeoLocation{Name: item.Id, Latitude: loc.Lat, Longitude: loc.Lng}
    if err := r.GeoAdd(ctx, item.key(), geoLocaiton).Err(); err != nil {
        fmt.Println("error adding geo", err)
        return errors.New("failed to set point")
    }
    return nil
}

For general notifications (timeout on the pulling is zero) meaning infinate:

type DriverData struct {
    Status   OrderStatusType `json:"status,omitempty"`
    DriverId uint            `json:"driver_id,omitempty"`
    UserId   uint            `json:"user_id,omitempty"`
}

func (config *Config) DriverOrderStatus(c *gin.Context) {
    driverID := utils.ToUint(auth.GetToken(c).Subject)
    ctx := c.Request.Context()

    // order := models.GetOrder(config.Db)
    // var _ = order.GetActiveOrderForUser(driverID)

    wsconn, err := websocket.Accept(c.Writer, c.Request, &websocket.AcceptOptions{InsecureSkipVerify: true})
    if err != nil {
        return
    }

    // if order.ID != 0 {
    //  var _ = wsjson.Write(ctx, wsconn, &UserData{Order: order, Status: order.Status, Driver: order.Driver})
    // } else {
    //  var _ = wsjson.Write(ctx, wsconn, &UserData{ResetOrder: true})
    // }

    defer wsconn.Close(websocket.StatusInternalError, "")

    closeRead := wsconn.CloseRead(ctx)

    driverDataCh := make(chan *DriverData, 1000)

    go func() {
    loop:
        for {
            select {
            case <-closeRead.Done():
                break loop
            default:
                if status, err := config.Redis.DriverPullStatus(ctx, driverID); err == nil {
                    driverDataCh <- &DriverData{Status: status.Status, DriverId: status.DriverID, UserId: status.UserID}
                }
            }
        }
        fmt.Println("redis pulling data is over")
    }()

loop:
    for {
        select {
        case <-closeRead.Done():
            break loop
        case driverData := <-driverDataCh:
            if err := wsjson.Write(ctx, wsconn, driverData); err != nil {
                break loop
            }
        }
    }

    fmt.Println("sending updates to user is over")

}

This is Redis server info:

127.0.0.1:6379> info
# Server
redis_version:6.2.6
redis_git_sha1:00000000
redis_git_dirty:0
redis_build_id:a0adc3471b8cfa72
redis_mode:standalone
os:Linux 5.10.47-linuxkit x86_64
arch_bits:64
multiplexing_api:epoll
atomicvar_api:atomic-builtin
gcc_version:10.3.1
process_id:1
process_supervised:no
run_id:d92005e2ccb89ea8e3be57e3bb1b79e0e323c2a7
tcp_port:6379
server_time_usec:1654937072463352
uptime_in_seconds:325287
uptime_in_days:3
hz:10
configured_hz:10
lru_clock:10769904
executable:/data/redis-server
config_file:
io_threads_active:0

# Clients
connected_clients:104
cluster_connections:0
maxclients:10000
client_recent_max_input_buffer:48
client_recent_max_output_buffer:0
blocked_clients:81
tracking_clients:0
clients_in_timeout_table:0

# Memory
used_memory:3081168
used_memory_human:2.94M
used_memory_rss:5791744
used_memory_rss_human:5.52M
used_memory_peak:5895528
used_memory_peak_human:5.62M
used_memory_peak_perc:52.26%
used_memory_overhead:2944804
used_memory_startup:809880
used_memory_dataset:136364
used_memory_dataset_perc:6.00%
allocator_allocated:3166992
allocator_active:3862528
allocator_resident:6742016
total_system_memory:4125036544
total_system_memory_human:3.84G
used_memory_lua:37888
used_memory_lua_human:37.00K
used_memory_scripts:0
used_memory_scripts_human:0B
number_of_cached_scripts:0
maxmemory:0
maxmemory_human:0B
maxmemory_policy:noeviction
allocator_frag_ratio:1.22
allocator_frag_bytes:695536
allocator_rss_ratio:1.75
allocator_rss_bytes:2879488
rss_overhead_ratio:0.86
rss_overhead_bytes:-950272
mem_fragmentation_ratio:1.88
mem_fragmentation_bytes:2712392
mem_not_counted_for_evict:0
mem_replication_backlog:0
mem_clients_slaves:0
mem_clients_normal:2134588
mem_aof_buffer:0
mem_allocator:jemalloc-5.1.0
active_defrag_running:0
lazyfree_pending_objects:0
lazyfreed_objects:0

# Persistence
loading:0
current_cow_size:0
current_cow_size_age:0
current_fork_perc:0.00
current_save_keys_processed:0
current_save_keys_total:0
rdb_changes_since_last_save:3636
rdb_bgsave_in_progress:0
rdb_last_save_time:1654936992
rdb_last_bgsave_status:ok
rdb_last_bgsave_time_sec:1
rdb_current_bgsave_time_sec:-1
rdb_last_cow_size:450560
aof_enabled:0
aof_rewrite_in_progress:0
aof_rewrite_scheduled:0
aof_last_rewrite_time_sec:-1
aof_current_rewrite_time_sec:-1
aof_last_bgrewrite_status:ok
aof_last_write_status:ok
aof_last_cow_size:0
module_fork_in_progress:0
module_fork_last_cow_size:0

# Stats
total_connections_received:1271
total_commands_processed:296750
instantaneous_ops_per_sec:45
total_net_input_bytes:27751095
total_net_output_bytes:1254190
instantaneous_input_kbps:4.16
instantaneous_output_kbps:12.46
rejected_connections:0
sync_full:0
sync_partial_ok:0
sync_partial_err:0
expired_keys:0
expired_stale_perc:0.00
expired_time_cap_reached_count:0
expire_cycle_cpu_milliseconds:18136
evicted_keys:0
keyspace_hits:10
keyspace_misses:3567
pubsub_channels:0
pubsub_patterns:0
latest_fork_usec:6453
total_forks:41
migrate_cached_sockets:0
slave_expires_tracked_keys:0
active_defrag_hits:0
active_defrag_misses:0
active_defrag_key_hits:0
active_defrag_key_misses:0
tracking_total_keys:0
tracking_total_items:0
tracking_total_prefixes:0
unexpected_error_replies:0
total_error_replies:6
dump_payload_sanitizations:0
total_reads_processed:297924
total_writes_processed:295658
io_threaded_reads_processed:0
io_threaded_writes_processed:0

# Replication
role:master
connected_slaves:0
master_failover_state:no-failover
master_replid:fc426cf72670e6ad09221bcb9c3423a1e1fab47e
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:0
second_repl_offset:-1
repl_backlog_active:0
repl_backlog_size:1048576
repl_backlog_first_byte_offset:0
repl_backlog_histlen:0

# CPU
used_cpu_sys:428.939381
used_cpu_user:123.850311
used_cpu_sys_children:0.755309
used_cpu_user_children:0.065924
used_cpu_sys_main_thread:428.485425
used_cpu_user_main_thread:123.679341

# Modules

# Errorstats
errorstat_ERR:count=6

# Cluster
cluster_enabled:0

# Keyspace
db0:keys=6,expires=0,avg_ttl=0
1

There are 1 best solutions below

0
On

After a lot of searching, it turns out that is caused by the max "open file descriptor". Every websocket connection will open a file descriptor. Every machine has a limit. In linux/unix, this is defined under ulimit.

More into that in this article.
In order to update ulimit in mac, refer to this post.