浏览代码

add system health check

shjung 2 周之前
父节点
当前提交
47f5df02bd

+ 11 - 0
its-cluster/src/main/java/com/its/common/cluster/handler/ClusterMasterHandler.java

@@ -120,6 +120,17 @@ public class ClusterMasterHandler extends ChannelInboundHandlerAdapter {
 
     @Override
     public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
+        String exceptionId = cause != null ? cause.getClass().getSimpleName() : "UnknownException";
+        String exceptionMsg = cause != null ? cause.getMessage() : "Unknown exceptionCaught";
+        ClusterNode cluster = ctx.channel().attr(AbstractClusterConfig.CLUSTER_ATTRIBUTE_KEY).get();
+        if (cluster == null) {
+            log.error("ClusterNodeId: {}, ClusterMasterHandler.exceptionCaught: [{}]. {}:::{}.",
+                    this.clusterConfig.getId(), ClusterUtils.getAddress(ctx.channel()), exceptionId, exceptionMsg);
+        }
+        else {
+            log.error("ClusterNodeId: {}, ItsAsnClientPacketInboundHandler.exceptionCaught: fromClusterNodeId: {}, {}:::{}.",
+                    this.clusterConfig.getId(), cluster.getId(), exceptionId, exceptionMsg);
+        }
         ctx.close();
     }
 }

+ 4 - 0
its-cluster/src/main/java/com/its/common/cluster/handler/ClusterSlaveHandler.java

@@ -28,6 +28,10 @@ public class ClusterSlaveHandler extends ChannelInboundHandlerAdapter {
 
     @Override
     public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
+        String exceptionId = cause != null ? cause.getClass().getSimpleName() : "UnknownException";
+        String exceptionMsg = cause != null ? cause.getMessage() : "Unknown exceptionCaught";
+        log.error("ClusterNodeId: {}, ClusterSlaveHandler.exceptionCaught: fromClusterNodeId: {}, {}:::{}.",
+                this.slaveService.getClusterConfig().getId(), this.clusterNode.getId(), exceptionId, exceptionMsg);
         ctx.close();
     }
 }

+ 2 - 2
its-cluster/src/main/java/com/its/common/cluster/service/AbstractClusterMasterService.java

@@ -29,7 +29,7 @@ public abstract class AbstractClusterMasterService {
     private final ThreadPoolTaskScheduler taskScheduler = new ThreadPoolTaskScheduler();
 
     @Getter
-    private final AbstractClusterConfig clusterConfig;
+    protected final AbstractClusterConfig clusterConfig;
 
     private ScheduledFuture<?> taskFuture;
     private EventLoopGroup acceptGroup;
@@ -142,7 +142,7 @@ public abstract class AbstractClusterMasterService {
         }
     }
 
-    private synchronized void electionMasterCluster() {
+    public synchronized void electionMasterCluster() {
         boolean isMaster = electionMaster();
         boolean isChanged = this.clusterConfig.isMaster() != isMaster;
         this.clusterConfig.setMaster(isMaster);

+ 21 - 9
its-cluster/src/main/java/com/its/common/cluster/service/AbstractClusterSlaveService.java

@@ -31,12 +31,12 @@ public abstract class AbstractClusterSlaveService {
     private final List<ClusterSlave> clientTasks = Collections.synchronizedList(new ArrayList<>());
 
     @Getter
-    private final AbstractClusterConfig clusterConfig;
+    protected final AbstractClusterConfig clusterConfig;
+    private final AbstractClusterMasterService masterService;
 
     private ScheduledFuture<?> taskFuture = null;
     private ClusterSlaveBootstrapFactory bootstrapFactory;
 
-
     @PostConstruct
     void init() {
         this.bootstrapFactory = new ClusterSlaveBootstrapFactory(1, 5);
@@ -226,15 +226,13 @@ public abstract class AbstractClusterSlaveService {
     public void onNotifyClusterNetworkState(ClusterNode clusterNode, boolean isActive) {
         log.info("ClusterNodeId: {}, ClusterSlaveService.onNotifyClusterNetworkState: fromClusterNodeId: {}, isNetActive; {}",
                 this.clusterConfig.getId(), clusterNode.getId(), isActive);
-        if (isActive) {
-            // 네트워크가 연결되었으면 클러스터 메시지를 전송
-            ClusterMessage clusterMsg = getClusterMessage();
-            sendSyncData(clusterNode, clusterMsg);
-        }
-        else {
+
+        if (!isActive) {
             // master 의 네트워크 상태를 확인하고 master 네트워크의 연결을 종료시킨다.
             if (clusterNode.getElectionState().getState() != ClusterNET.CLOSED) {
                 Channel channel = clusterNode.getElectionState().getChannel();
+                log.info("ClusterNodeId: {}, ClusterSlaveService.onNotifyClusterNetworkState: fromClusterNodeId: {}, Try SlaveNetwork Close. channel: {}",
+                        this.clusterConfig.getId(), clusterNode.getId(), channel);
                 if (channel != null) {
                     channel.flush();
                     channel.disconnect();
@@ -243,7 +241,21 @@ public abstract class AbstractClusterSlaveService {
                 clusterNode.getElectionState().disConnect();
             }
         }
+
+        this.masterService.electionMasterCluster();
+
+        if (isActive) {
+            // 네트워크가 연결되었으면 클러스터 메시지를 전송
+            ClusterMessage clusterMsg = getClusterMessage();
+            sendSyncData(clusterNode, clusterMsg);
+        }
+
+        if (isActive) {
+            this.masterService.onClusterChannelActive(clusterNode);
+        }
+        else {
+            this.masterService.onClusterChannelInactive(clusterNode);
+        }
     }
 
 }
-

+ 1 - 1
its-cluster/src/main/java/com/its/common/cluster/service/ClusterMasterInitializer.java

@@ -61,10 +61,10 @@ public class ClusterMasterInitializer extends ChannelInitializer<Channel> {
             log.warn("ClusterNodeId: {}, ClusterMasterInitializer.initChannel: [clusterId: {}, IP Address: {}.{}], Already Connected. Old Connection will be closed.", this.clusterConfig.getId(), clusterId, ipAddress, clientPort);
             // 이벤트 핸들러 에서 중복 처리 되지 않도록 속성 값을 제거
             channel.attr(AbstractClusterConfig.CLUSTER_ATTRIBUTE_KEY).set(null);
-            clusterNode.getElectionState().disConnect();
 
             channel.disconnect();
             channel.close();
+            clusterNode.getElectionState().disConnect();
         }
 
         clusterNode.getElectionState().connect(channel);