在workman中跑常规的ws服务,register、gateway、business偶发会出现上面的报错:Exception: getAllGatewayAddressesFromRegister fail. tcp://0.0.0.0:1236
之前没有遇到过,看报错的描述是因为连不上注册中心,导致拿不到gateway的通讯地址?请问下这是什么原因导致的?是偶发,不是致命的错误。
review代码是通过注册中心获取gateway通讯地址出错
/**
* 获取通过注册中心获取所有 gateway 通讯地址
*
* @return array
* @throws Exception
*/
protected static function getAllGatewayAddressesFromRegister()
{
static $addresses_cache, $last_update;
$time_now = time();
$expiration_time = 1;
$register_addresses = (array)static::$registerAddress;
if(empty($addresses_cache) || $time_now - $last_update > $expiration_time) {
foreach ($register_addresses as $register_address) {
set_error_handler(function(){});
$client = stream_socket_client('tcp://' . $register_address, $errno, $errmsg, static::$connectTimeout);
restore_error_handler();
if ($client) {
break;
}
}
if (!$client) {
throw new Exception('Can not connect to tcp://' . $register_address . ' ' . $errmsg);
}
fwrite($client, '{"event":"worker_connect","secret_key":"' . static::$secretKey . '"}' . "\n");
stream_set_timeout($client, 5);
$ret = fgets($client, 655350);
if (!$ret || !$data = json_decode(trim($ret), true)) {
throw new Exception('getAllGatewayAddressesFromRegister fail. tcp://' .
$register_address . ' return ' . var_export($ret, true));
}
$last_update = $time_now;
$addresses_cache = $data['addresses'];
}
if (!$addresses_cache) {
throw new Exception('Gateway::getAllGatewayAddressesFromRegister() with registerAddress:' .
json_encode(static::$registerAddress) . ' return ' . var_export($addresses_cache, true));
}
return $addresses_cache;
}
发下你的调用代码
就是常规的sendToGroup、Gateway::joinGroup、bindUid这种用法,没有特别的,然后日志会偶发出现上述报错:
2021-01-14 17:17:46 客户端:7f0000010b5800000006登录
2021-01-14 17:17:46 用户ID:[1]已绑定客户端ID:[7f0000010b5800000006]
2021-01-14 17:17:46 客户端[7f0000010b5800000006]发送了消息{"jsonrpc":"2.0","method":"ws:subscribe","id":35204388.75511863,"params":{"topic":"im_live_room_30593_admin","uid":1}},消息uuid是7f0000010b58000000065186636166760911202
2021-01-14 17:17:46 客户端:7f0000010b5800000006订阅频道
2021-01-14 17:17:46 客户端:[7f0000010b5800000006]订阅了频道[im_live_room_30593_admin]
Exception 'Exception' with message 'getAllGatewayAddressesFromRegister fail. tcp://0.0.0.0:1236 return false'
in /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/gatewayclient/Gateway.php:1307
Stack trace:
0 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/gatewayclient/Gateway.php(726): GatewayClient\Gateway::getAllGatewayAddressesFromRegister()
1 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/gatewayclient/Gateway.php(701): GatewayClient\Gateway::getAllGatewayAddress()
2 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/gatewayclient/Gateway.php(498): GatewayClient\Gateway::getBufferFromAllGateway(Array)
3 /home/wwwroot/live-ecs.demo1.umworks.com/modules/websocket/commands/RegisterWorker.php(77): GatewayClient\Gateway::getAllGroupIdList()
4 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/workerman/Events/Select.php(232): pengpai\modules\websocket\commands\RegisterWorker->pengpai\modules\websocket\commands{closure}()
5 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/workerman/Events/Select.php(280): Workerman\Events\Select->tick()
6 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/workerman/Worker.php(2408): Workerman\Events\Select->loop()
7 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/gateway-worker/src/Register.php(86): Workerman\Worker->run()
8 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/workerman/Worker.php(1541): GatewayWorker\Register->run()
9 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/workerman/Worker.php(1371): Workerman\Worker::forkOneWorkerForLinux(Object(GatewayWorker\Register))
10 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/workerman/Worker.php(1345): Workerman\Worker::forkWorkersForLinux()
11 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/workerman/Worker.php(1675): Workerman\Worker::forkWorkers()
12 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/workerman/Worker.php(1624): Workerman\Worker::monitorWorkersForLinux()
13 /home/wwwroot/live-ecs.demo1.umworks.com/vendor/workerman/workerman/Worker.php(548): Workerman\Worker::monitorWorkers()
14 /home/wwwroot/live-ecs.demo1.umworks.com/app/commands/DaemonController.php(134): Workerman\Worker::runAll()
15 [internal function]: pengpai\commands\DaemonController->actionService('ws', 'start')
16 /home/wwwroot/live-ecs.demo1.umworks.com/libs/yii2/base/InlineAction.php(57): call_user_func_array(Array, Array)
17 /home/wwwroot/live-ecs.demo1.umworks.com/libs/yii2/base/Controller.php(180): yii\base\InlineAction->runWithParams(Array)
18 /home/wwwroot/live-ecs.demo1.umworks.com/libs/yii2/console/Controller.php(179): yii\base\Controller->runAction('service', Array)
19 /home/wwwroot/live-ecs.demo1.umworks.com/libs/yii2/base/Module.php(528): yii\console\Controller->runAction('service', Array)
20 /home/wwwroot/live-ecs.demo1.umworks.com/libs/yii2/console/Application.php(180): yii\base\Module->runAction('daemon/service', Array)
21 /home/wwwroot/live-ecs.demo1.umworks.com/libs/yii2/console/Application.php(147): yii\console\Application->runAction('daemon/service', Array)
22 /home/wwwroot/live-ecs.demo1.umworks.com/libs/yii2/base/Application.php(387): yii\console\Application->handleRequest(Object(pengpai\components\console\Request))
23 /home/wwwroot/live-ecs.demo1.umworks.com/app/components/console/Application.php(24): yii\base\Application->run()
24 /home/wwwroot/live-ecs.demo1.umworks.com/yii(9): pengpai\components\console\Application->run()
25 {main}
Worker[30228] process terminated
worker[ws-register:30228] exit with status 256
你可以看看上面日志,下一次又是正常,看status也是正常的
Workerman version:3.5.31 PHP version:7.2.24
start time:2021-01-14 17:19:47 run 0 days 0 hours
load average: 0.37, 1, 1 event-loop:\Workerman\Events\Select
3 workers 33 processes
worker_name exit_status exit_count
ws-register 0 0
ws-gateway 0 0
ws-business 0 0
----------------------------------------------PROCESS STATUS---------------------------------------------------
pid memory listening worker_name connections send_fail timers total_request qps status
858 24M text://0.0.0.0:1236 ws-register 32 0 1 34 0 [idle]
859 24M websocket://0.0.0.0:8484 ws-gateway 17 0 1 18 0 [idle]
860 24M websocket://0.0.0.0:8484 ws-gateway 17 0 1 18 0 [idle]
863 24M websocket://0.0.0.0:8484 ws-gateway 17 0 1 18 0 [idle]
864 24M websocket://0.0.0.0:8484 ws-gateway 17 0 1 18 0 [idle]
865 24M websocket://0.0.0.0:8484 ws-gateway 18 0 1 18 0 [idle]
866 24M websocket://0.0.0.0:8484 ws-gateway 17 0 1 18 0 [idle]
867 24M websocket://0.0.0.0:8484 ws-gateway 19 0 1 18 0 [idle]
868 24M websocket://0.0.0.0:8484 ws-gateway 18 0 1 28 0 [idle]
869 24M websocket://0.0.0.0:8484 ws-gateway 17 0 1 18 0 [idle]
870 24M websocket://0.0.0.0:8484 ws-gateway 18 0 1 31 0 [idle]
871 24M websocket://0.0.0.0:8484 ws-gateway 18 0 1 18 0 [idle]
873 24M websocket://0.0.0.0:8484 ws-gateway 17 0 1 18 0 [idle]
874 24M websocket://0.0.0.0:8484 ws-gateway 18 0 1 18 0 [idle]
875 24M websocket://0.0.0.0:8484 ws-gateway 17 0 1 18 0 [idle]
876 24M websocket://0.0.0.0:8484 ws-gateway 18 0 1 18 0 [idle]
877 24M websocket://0.0.0.0:8484 ws-gateway 19 0 1 18 0 [idle]
879 28M none ws-business 17 0 0 9 0 [idle]
880 24M none ws-business 17 0 0 4 0 [idle]
881 24M none ws-business 17 0 0 5 0 [idle]
883 24M none ws-business 17 0 0 6 0 [idle]
884 24M none ws-business 17 0 0 5 0 [idle]
885 24M none ws-business 17 0 0 3 0 [idle]
886 24M none ws-business 17 0 0 4 0 [idle]
887 24M none ws-business 17 0 0 3 0 [idle]
888 24M none ws-business 17 0 0 5 0 [idle]
889 24M none ws-business 17 0 0 5 0 [idle]
890 24M none ws-business 17 0 0 5 0 [idle]
891 24M none ws-business 17 0 0 5 0 [idle]
892 24M none ws-business 17 0 0 5 0 [idle]
893 26M none ws-business 17 0 0 9 0 [idle]
895 24M none ws-business 17 0 0 5 0 [idle]
896 24M none ws-business 17 0 0 3 0 [idle]
----------------------------------------------PROCESS STATUS---------------------------------------------------
Summary 798M - - 586 0 17 426 0 [Summary]
gatewayClient么?发下调用代码
@1:
// $clientIdList = Gateway::getClientIdListByGroup($channel);
// $clientIdList = array_unique($clientIdList);
// echo "当前分组{$channel}客户端ID列表" . Json::encode($clientIdList) . "\n";
// if (!$clientIdList) {
// continue;
// }
// foreach ($clientIdList as $client_id) {
// Gateway::sendToClient($client_id, Json::encode($sendData));
// }
echo "发送数据:" . Json::encode($sendData) . "\n";
Gateway::sendToGroup($channel, Json::encode($sendData));
} catch (Exception $e) {
$this->handleError("广播直播间[{$roomId}]统计数据发生错误", ['e' => $e]);
continue;
}
GatewayClient\Gateway 的简单调用 sendToGroup ,不单是这处调用有问题,偶发的
@1: 可以帮忙看看什么问题吗 调用上没什么特别的吧 自己也没做一些中转处理之类的
设置 Gateway::$registerAddress = xxx; 的代码在哪?贴下代码。
@1: 我们在bootstrap的时候 定义了注册中心地址以及端口
\GatewayClient\Gateway::$registerAddress = $yii2->params['websocketRegisterIP'] . ':' . $yii2->params['websocketRegisterPort'];
@1:同时 在gatewayworker 以及businessworker 启动前也初始化了一次 跟上面一致
websocketRegisterIP 就是0.0.0.0 websocketRegisterPort 是1236
手册写了,\GatewayClient\Gateway::$registerAddress 不能用 0.0.0.0,要用ip。如果gatewayClient和GatewayWorker在一台服务器,就用127.0.0.1:xxx。
@1: 首先是单机,刚刚把registerAddress改成了127.0.0.1
Workerman[ws] start in DEBUG mode
------------------------------------------- WORKERMAN --------------------------------------------
Workerman version:3.5.31 PHP version:7.2.24
-------------------------------------------- WORKERS ---------------------------------------------
proto user worker listen processes status
tcp root ws-register text://127.0.0.1:1236 1 [OK]
tcp root ws-gateway websocket://0.0.0.0:8484 16 [OK]
tcp root ws-business none
gateway暴露0.0.0.0监听所有网卡,这样还是会出现同样的报错
getAllGatewayAddressesFromRegister fail. tcp://127.0.0.1:1236 return false
你好,请问这个问题有解决嘛,我也遇到了相同的问题
东哥 你好 暂时没有解决,等作者看看是什么原因吧,我定位不到什么原因,注册中心、gateway都是正常的
没人回复我吗 处理这个问题
使用gatewayClient时,
\GatewayClient\Gateway::$registerAddress
不能用0.0.0.0
。手册地址:http://doc2.workerman.net/push-in-other-project.html
科普下基础,0.0.0.0 只能在监听的时候用(作为服务端),其它时候(作为客户端)都得用实际ip。
谢谢作者大大~~~
Workerman[ws] start in DEBUG mode
------------------------------------------- WORKERMAN --------------------------------------------
Workerman version:3.5.31 PHP version:7.2.24
-------------------------------------------- WORKERS ---------------------------------------------
proto user worker listen processes status
tcp root ws-register text://127.0.0.1:1236 1 [OK]
tcp root ws-gateway websocket://0.0.0.0:8484 16 [OK]
tcp root ws-business none
gateway暴露0.0.0.0监听所有网卡,这样还是会出现同样的报错
getAllGatewayAddressesFromRegister fail. tcp://127.0.0.1:1236 return false
还是报错
gatewayClient 和 GatewayWorker 是同一台服务器么?
@1:是的 同一台 单机部署的,我们正式环境才会用分布式
@1: 单机部署,registerAddress注册中心的地址按照手册修正为127.0.0.1 然后gateway 0.0.0.0监听 现在是这样也会出现那样的报错 tpc:127.0.0.1:1236 return false
运行 ps auxf,看下是不是运行了多个gatewayWorker。
@1:
[root@vm]# ps aux | grep Worker
root 4405 0.0 0.4 308408 33932 ? S 13:01 0:00 WorkerMan: worker process ws-business none
root 4847 0.0 0.4 308408 33936 ? S 14:04 0:00 WorkerMan: worker process ws-business none
root 5008 0.0 0.4 308408 33936 ? S 14:25 0:00 WorkerMan: worker process ws-business none
root 9327 0.0 0.4 308408 33928 ? S 13:25 0:00 WorkerMan: worker process ws-business none
root 9758 0.0 0.3 306360 31712 ? S 14:28 0:00 WorkerMan: worker process ws-business none
root 15324 0.0 0.4 308408 33920 ? S 13:08 0:00 WorkerMan: worker process ws-business none
root 15769 0.0 0.4 308408 33936 ? S 14:11 0:00 WorkerMan: worker process ws-business none
root 15924 0.0 0.3 306360 31716 ? S 14:32 0:00 WorkerMan: worker process ws-business none
root 16788 0.0 0.3 304312 29880 ? S 12:28 0:00 WorkerMan: master process start_file=/home/wwwroot/live-ecs.demo1.umworks.com/yii
root 16792 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16793 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16794 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16795 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16796 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16797 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16798 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16799 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16800 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16802 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16803 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16804 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16805 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16806 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16807 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 16808 0.0 0.3 304312 30580 ? S 12:28 0:01 WorkerMan: worker process ws-gateway websocket://0.0.0.0:8484
root 20590 0.0 0.3 306360 31716 ? S 14:35 0:00 WorkerMan: worker process ws-business none
root 26318 0.0 0.4 308408 33940 ? S 13:36 0:00 WorkerMan: worker process ws-business none
root 26448 0.0 0.4 308408 33920 ? S 13:57 0:00 WorkerMan: worker process ws-business none
root 26592 0.0 0.4 308408 33924 ? S 14:18 0:00 WorkerMan: worker process ws-business none
root 26744 0.0 0.3 306360 31716 ? S 14:39 0:00 WorkerMan: worker process ws-business none
root 28287 0.0 0.3 304312 30168 ? S 14:40 0:00 WorkerMan: worker process ws-register text://127.0.0.1:1236
root 28289 0.0 0.0 112720 972 pts/0 S+ 14:40 0:00 grep --color=auto Worker
root 30925 0.0 0.4 308408 33936 ? S 13:39 0:00 WorkerMan: worker process ws-business none
root 31063 0.0 0.4 308408 33920 ? S 14:00 0:00 WorkerMan: worker process ws-business none
root 31207 0.0 0.4 308408 33940 ? S 14:21 0:00 WorkerMan: worker process ws-business none
root 31583 0.0 0.3 304312 29668 ? S Jan14 0:00 WorkerMan: master process start_file=/home/wwwroot/live-ecs.demo1.umworks.com/yii
root 31584 0.1 0.5 318780 44736 ? S Jan14 1:41 WorkerMan: worker process live-room-stats-info none
没有
@1: 只有一个项目用到了workman,没有占用的一些问题
@1: 我们刚刚更新了正式环境 分布式部署 也存在这样的问题 这个问题就是这两周才有的 之前没有出现过的 我们的机器都是克隆的 配置都没改过
@1: 分部署部署,一台机作为注册中心,跑了register、gateway、business,其他几台机器单独跑gateway、business,连的是注册中心的IP,lanIp是本机内网IP
看到有2个 start_file=/home/wwwroot/live-ecs.demo1.umworks.com/yii 主进程,应该是开了2个gatewayWorker或者workerman程序。运行 php start.php stop,然后运行 ps auxf,kill掉重复启动的gatewayWorker进程。
已解决 是我们在注册启动后onWorkerStart 加了一个定时器导致的 具体原因不知 就会产生上诉问题
可能是你定时器里的业务逻辑影响了register服务的正常执行,比如死循环或者长时间阻塞等
@1: 不是主进程问题,我全部kill掉,重启还是一样,我们在onWorkerStart的时候加了一个定时器
是想定时统计每个分组具体的client数 没有死循环的代码 很奇怪 把这逻辑还原就正常
你的报错应该就是这个定时器报的,register自己调用自己了,不要在register上运行业务,不支持。
@1: 谢谢作者大大~~ 我们到时单独起个worker做这样的统计