bug fixed: two tracker leaders occur in rare case
parent
5fcbffbf7a
commit
fb7ae7d29a
3
HISTORY
3
HISTORY
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
Version 5.04 2014-08-24
|
||||
Version 5.04 2014-09-13
|
||||
* add fastdfs.spec for build RPM on Linux
|
||||
* depend on libfastcommon
|
||||
* in multi tracker servers case, when receive higher status like
|
||||
|
|
@ -7,6 +7,7 @@ Version 5.04 2014-08-24
|
|||
the tracker adjust storage status to newer, and the storage rejoin
|
||||
to the tracker server
|
||||
* fdfs_monitor support delete empty group
|
||||
* bug fixed: two tracker leaders occur in rare case
|
||||
|
||||
Version 5.03 2014-08-10
|
||||
* network send and recv retry when error EINTR happen
|
||||
|
|
|
|||
|
|
@ -1009,6 +1009,94 @@ static int tracker_merge_servers(ConnectionInfo *pTrackerServer, \
|
|||
diffServers, pDiffServer - diffServers);
|
||||
}
|
||||
|
||||
static int _notify_reselect_tleader(ConnectionInfo *pTrackerServer)
|
||||
{
|
||||
char out_buff[sizeof(TrackerHeader)];
|
||||
TrackerHeader *pHeader;
|
||||
int64_t in_bytes;
|
||||
int result;
|
||||
|
||||
pHeader = (TrackerHeader *)out_buff;
|
||||
memset(out_buff, 0, sizeof(out_buff));
|
||||
pHeader->cmd = TRACKER_PROTO_CMD_TRACKER_NOTIFY_RESELECT_LEADER;
|
||||
if ((result=tcpsenddata_nb(pTrackerServer->sock, out_buff, \
|
||||
sizeof(out_buff), g_fdfs_network_timeout)) != 0)
|
||||
{
|
||||
logError("file: "__FILE__", line: %d, " \
|
||||
"tracker server %s:%d, send data fail, " \
|
||||
"errno: %d, error info: %s.", \
|
||||
__LINE__, pTrackerServer->ip_addr, \
|
||||
pTrackerServer->port, \
|
||||
result, STRERROR(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
if ((result=fdfs_recv_header(pTrackerServer, &in_bytes)) != 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
if (in_bytes != 0)
|
||||
{
|
||||
logError("file: "__FILE__", line: %d, " \
|
||||
"tracker server %s:%d, recv body length: " \
|
||||
"%"PRId64" != 0", __LINE__, pTrackerServer->ip_addr, \
|
||||
pTrackerServer->port, in_bytes);
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int notify_reselect_tracker_leader(ConnectionInfo *pTrackerServer)
|
||||
{
|
||||
int result;
|
||||
|
||||
pTrackerServer->sock = -1;
|
||||
if ((conn=tracker_connect_server(pTrackerServer, &result)) == NULL)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
result = _notify_reselect_tleader(pTrackerServer);
|
||||
tracker_disconnect_server_ex(conn, result != 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void set_tracker_leader(const int leader_index)
|
||||
{
|
||||
int old_index;
|
||||
old_index = g_tracker_group.leader_index;
|
||||
if (old_index >= 0 && old_index != leader_index)
|
||||
{
|
||||
TrackerRunningStatus tracker_status;
|
||||
ConnectionInfo old_leader_server;
|
||||
memcpy(&old_leader_server, g_tracker_group.servers + old_index,
|
||||
sizeof(ConnectionInfo));
|
||||
if (fdfs_get_tracker_status(&old_leader_server, &tracker_status) == 0)
|
||||
{
|
||||
if (tracker_status.if_leader)
|
||||
{
|
||||
ConnectionInfo new_leader_server;
|
||||
memcpy(&new_leader_server, g_tracker_group.servers + leader_index,
|
||||
sizeof(ConnectionInfo));
|
||||
logWarning("file: "__FILE__", line: %d, "
|
||||
"two tracker leaders occur, old leader is %s:%d, "
|
||||
"new leader is %s:%d, notify to re-select "
|
||||
"tracker leader", __LINE__,
|
||||
old_leader_server.ip_addr, old_leader_server.port,
|
||||
new_leader_server.ip_addr, new_leader_server.port);
|
||||
|
||||
notify_reselect_tracker_leader(&old_leader_server);
|
||||
notify_reselect_tracker_leader(&new_leader_server);
|
||||
g_tracker_group.leader_index = -1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
g_tracker_group.leader_index = leader_index;
|
||||
}
|
||||
|
||||
static int tracker_check_response(ConnectionInfo *pTrackerServer, \
|
||||
bool *bServerPortChanged)
|
||||
{
|
||||
|
|
@ -1146,7 +1234,9 @@ static int tracker_check_response(ConnectionInfo *pTrackerServer, \
|
|||
pTrackerServer->ip_addr, pTrackerServer->port,\
|
||||
tracker_leader_ip, tracker_leader_port);
|
||||
|
||||
g_tracker_group.leader_index = leader_index;
|
||||
pthread_mutex_lock(&reporter_thread_lock);
|
||||
set_tracker_leader(leader_index);
|
||||
pthread_mutex_unlock(&reporter_thread_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3740,72 +3740,6 @@ static int _tracker_mem_add_storage(FDFSGroupInfo *pGroup, \
|
|||
return result;
|
||||
}
|
||||
|
||||
int tracker_mem_get_status(ConnectionInfo *pTrackerServer, \
|
||||
TrackerRunningStatus *pStatus)
|
||||
{
|
||||
char in_buff[1 + 2 * FDFS_PROTO_PKG_LEN_SIZE];
|
||||
TrackerHeader header;
|
||||
char *pInBuff;
|
||||
ConnectionInfo *conn;
|
||||
int64_t in_bytes;
|
||||
int result;
|
||||
|
||||
pTrackerServer->sock = -1;
|
||||
if ((conn=tracker_connect_server(pTrackerServer, &result)) == NULL)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
memset(&header, 0, sizeof(header));
|
||||
header.cmd = TRACKER_PROTO_CMD_TRACKER_GET_STATUS;
|
||||
if ((result=tcpsenddata_nb(conn->sock, &header, \
|
||||
sizeof(header), g_fdfs_network_timeout)) != 0)
|
||||
{
|
||||
logError("file: "__FILE__", line: %d, " \
|
||||
"send data to tracker server %s:%d fail, " \
|
||||
"errno: %d, error info: %s", __LINE__, \
|
||||
pTrackerServer->ip_addr, \
|
||||
pTrackerServer->port, \
|
||||
result, STRERROR(result));
|
||||
|
||||
result = (result == ENOENT ? EACCES : result);
|
||||
break;
|
||||
}
|
||||
|
||||
pInBuff = in_buff;
|
||||
result = fdfs_recv_response(conn, &pInBuff, \
|
||||
sizeof(in_buff), &in_bytes);
|
||||
if (result != 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (in_bytes != sizeof(in_buff))
|
||||
{
|
||||
logError("file: "__FILE__", line: %d, " \
|
||||
"tracker server %s:%d response data " \
|
||||
"length: %"PRId64" is invalid, " \
|
||||
"expect length: %d.", __LINE__, \
|
||||
pTrackerServer->ip_addr, pTrackerServer->port, \
|
||||
in_bytes, (int)sizeof(in_buff));
|
||||
result = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
pStatus->if_leader = *in_buff;
|
||||
pStatus->running_time = buff2long(in_buff + 1);
|
||||
pStatus->restart_interval = buff2long(in_buff + 1 + \
|
||||
FDFS_PROTO_PKG_LEN_SIZE);
|
||||
|
||||
} while (0);
|
||||
|
||||
tracker_disconnect_server_ex(conn, result != 0);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void tracker_calc_running_times(TrackerRunningStatus *pStatus)
|
||||
{
|
||||
pStatus->running_time = g_current_time - g_up_time;
|
||||
|
|
@ -4186,7 +4120,7 @@ static int tracker_mem_get_tracker_server(FDFSStorageJoinBody *pJoinBody, \
|
|||
}
|
||||
|
||||
pStatus->pTrackerServer = pTrackerServer;
|
||||
r = tracker_mem_get_status(pTrackerServer, pStatus);
|
||||
r = fdfs_get_tracker_status(pTrackerServer, pStatus);
|
||||
if (r == 0)
|
||||
{
|
||||
pStatus++;
|
||||
|
|
@ -5422,6 +5356,7 @@ void tracker_mem_find_trunk_servers()
|
|||
tracker_mem_find_trunk_server(*ppGroup, true);
|
||||
}
|
||||
}
|
||||
g_trunk_server_chg_count++;
|
||||
pthread_mutex_unlock(&mem_thread_lock);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,13 +25,6 @@
|
|||
#define TRUNK_SERVER_CHANGELOG_FILENAME "trunk_server_change.log"
|
||||
#define STORAGE_DATA_FIELD_SEPERATOR ','
|
||||
|
||||
typedef struct {
|
||||
ConnectionInfo *pTrackerServer;
|
||||
int running_time; //running seconds, more means higher weight
|
||||
int restart_interval; //restart interval, less mean higher weight
|
||||
bool if_leader; //if leader
|
||||
} TrackerRunningStatus;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
|
@ -124,9 +117,6 @@ int tracker_mem_get_storage_index(FDFSGroupInfo *pGroup, \
|
|||
|
||||
void tracker_calc_running_times(TrackerRunningStatus *pStatus);
|
||||
|
||||
int tracker_mem_get_status(ConnectionInfo *pTrackerServer, \
|
||||
TrackerRunningStatus *pStatus);
|
||||
|
||||
int tracker_save_groups();
|
||||
|
||||
void tracker_mem_find_trunk_servers();
|
||||
|
|
|
|||
|
|
@ -617,3 +617,69 @@ int fdfs_get_ini_context_from_tracker(TrackerServerGroup *pTrackerGroup, \
|
|||
return EINTR;
|
||||
}
|
||||
|
||||
int fdfs_get_tracker_status(ConnectionInfo *pTrackerServer, \
|
||||
TrackerRunningStatus *pStatus)
|
||||
{
|
||||
char in_buff[1 + 2 * FDFS_PROTO_PKG_LEN_SIZE];
|
||||
TrackerHeader header;
|
||||
char *pInBuff;
|
||||
ConnectionInfo *conn;
|
||||
int64_t in_bytes;
|
||||
int result;
|
||||
|
||||
pTrackerServer->sock = -1;
|
||||
if ((conn=tracker_connect_server(pTrackerServer, &result)) == NULL)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
memset(&header, 0, sizeof(header));
|
||||
header.cmd = TRACKER_PROTO_CMD_TRACKER_GET_STATUS;
|
||||
if ((result=tcpsenddata_nb(conn->sock, &header, \
|
||||
sizeof(header), g_fdfs_network_timeout)) != 0)
|
||||
{
|
||||
logError("file: "__FILE__", line: %d, " \
|
||||
"send data to tracker server %s:%d fail, " \
|
||||
"errno: %d, error info: %s", __LINE__, \
|
||||
pTrackerServer->ip_addr, \
|
||||
pTrackerServer->port, \
|
||||
result, STRERROR(result));
|
||||
|
||||
result = (result == ENOENT ? EACCES : result);
|
||||
break;
|
||||
}
|
||||
|
||||
pInBuff = in_buff;
|
||||
result = fdfs_recv_response(conn, &pInBuff, \
|
||||
sizeof(in_buff), &in_bytes);
|
||||
if (result != 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (in_bytes != sizeof(in_buff))
|
||||
{
|
||||
logError("file: "__FILE__", line: %d, " \
|
||||
"tracker server %s:%d response data " \
|
||||
"length: %"PRId64" is invalid, " \
|
||||
"expect length: %d.", __LINE__, \
|
||||
pTrackerServer->ip_addr, pTrackerServer->port, \
|
||||
in_bytes, (int)sizeof(in_buff));
|
||||
result = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
pStatus->if_leader = *in_buff;
|
||||
pStatus->running_time = buff2long(in_buff + 1);
|
||||
pStatus->restart_interval = buff2long(in_buff + 1 + \
|
||||
FDFS_PROTO_PKG_LEN_SIZE);
|
||||
|
||||
} while (0);
|
||||
|
||||
tracker_disconnect_server_ex(conn, result != 0);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,13 +36,14 @@
|
|||
#define TRACKER_PROTO_CMD_STORAGE_GET_SERVER_ID 70 //get storage server id from tracker
|
||||
#define TRACKER_PROTO_CMD_STORAGE_FETCH_STORAGE_IDS 69 //get all storage ids from tracker
|
||||
|
||||
#define TRACKER_PROTO_CMD_TRACKER_GET_SYS_FILES_START 61 //start of tracker get system data files
|
||||
#define TRACKER_PROTO_CMD_TRACKER_GET_SYS_FILES_END 62 //end of tracker get system data files
|
||||
#define TRACKER_PROTO_CMD_TRACKER_GET_ONE_SYS_FILE 63 //tracker get a system data file
|
||||
#define TRACKER_PROTO_CMD_TRACKER_GET_STATUS 64 //tracker get status of other tracker
|
||||
#define TRACKER_PROTO_CMD_TRACKER_PING_LEADER 65 //tracker ping leader
|
||||
#define TRACKER_PROTO_CMD_TRACKER_NOTIFY_NEXT_LEADER 66 //notify next leader to other trackers
|
||||
#define TRACKER_PROTO_CMD_TRACKER_COMMIT_NEXT_LEADER 67 //commit next leader to other trackers
|
||||
#define TRACKER_PROTO_CMD_TRACKER_GET_SYS_FILES_START 61 //start of tracker get system data files
|
||||
#define TRACKER_PROTO_CMD_TRACKER_GET_SYS_FILES_END 62 //end of tracker get system data files
|
||||
#define TRACKER_PROTO_CMD_TRACKER_GET_ONE_SYS_FILE 63 //tracker get a system data file
|
||||
#define TRACKER_PROTO_CMD_TRACKER_GET_STATUS 64 //tracker get status of other tracker
|
||||
#define TRACKER_PROTO_CMD_TRACKER_PING_LEADER 65 //tracker ping leader
|
||||
#define TRACKER_PROTO_CMD_TRACKER_NOTIFY_NEXT_LEADER 66 //notify next leader to other trackers
|
||||
#define TRACKER_PROTO_CMD_TRACKER_COMMIT_NEXT_LEADER 67 //commit next leader to other trackers
|
||||
#define TRACKER_PROTO_CMD_TRACKER_NOTIFY_RESELECT_LEADER 68 //storage notify reselect leader when split-brain
|
||||
|
||||
#define TRACKER_PROTO_CMD_SERVER_LIST_ONE_GROUP 90
|
||||
#define TRACKER_PROTO_CMD_SERVER_LIST_ALL_GROUPS 91
|
||||
|
|
@ -276,6 +277,9 @@ int fdfs_get_ini_context_from_tracker(TrackerServerGroup *pTrackerGroup, \
|
|||
IniContext *iniContext, bool * volatile continue_flag, \
|
||||
const bool client_bind_addr, const char *bind_addr);
|
||||
|
||||
int fdfs_get_tracker_status(ConnectionInfo *pTrackerServer, \
|
||||
TrackerRunningStatus *pStatus);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -191,7 +191,7 @@ static int relationship_get_tracker_leader(TrackerRunningStatus *pTrackerStatus)
|
|||
pTrackerServer<pTrackerEnd; pTrackerServer++)
|
||||
{
|
||||
pStatus->pTrackerServer = pTrackerServer;
|
||||
r = tracker_mem_get_status(pTrackerServer, pStatus);
|
||||
r = fdfs_get_tracker_status(pTrackerServer, pStatus);
|
||||
if (r == 0)
|
||||
{
|
||||
pStatus++;
|
||||
|
|
|
|||
|
|
@ -879,7 +879,7 @@ static int tracker_deal_notify_next_leader(struct fast_task_info *pTask)
|
|||
g_tracker_leader_chg_count++;
|
||||
|
||||
logError("file: "__FILE__", line: %d, " \
|
||||
"client ip: %s, two leader occur, " \
|
||||
"client ip: %s, two leaders occur, " \
|
||||
"new leader is %s:%d", \
|
||||
__LINE__, pTask->client_ip, \
|
||||
leader.ip_addr, leader.port);
|
||||
|
|
@ -1644,6 +1644,44 @@ static int tracker_deal_ping_leader(struct fast_task_info *pTask)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int tracker_deal_reselect_leader(struct fast_task_info *pTask)
|
||||
{
|
||||
TrackerClientInfo *pClientInfo;
|
||||
|
||||
pClientInfo = (TrackerClientInfo *)pTask->arg;
|
||||
if (pTask->length - sizeof(TrackerHeader) != 0)
|
||||
{
|
||||
logError("file: "__FILE__", line: %d, " \
|
||||
"cmd=%d, client ip: %s, package size " \
|
||||
PKG_LEN_PRINTF_FORMAT" is not correct, " \
|
||||
"expect length 0", __LINE__, \
|
||||
TRACKER_PROTO_CMD_TRACKER_NOTIFY_RESELECT_LEADER, \
|
||||
pTask->client_ip, \
|
||||
pTask->length - (int)sizeof(TrackerHeader));
|
||||
pTask->length = sizeof(TrackerHeader);
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
pTask->length = sizeof(TrackerHeader);
|
||||
if (!g_if_leader_self)
|
||||
{
|
||||
logError("file: "__FILE__", line: %d, " \
|
||||
"cmd=%d, client ip: %s, i am not the leader!", \
|
||||
__LINE__, TRACKER_PROTO_CMD_TRACKER_NOTIFY_RESELECT_LEADER, \
|
||||
pTask->client_ip);
|
||||
return EOPNOTSUPP;
|
||||
}
|
||||
|
||||
g_if_leader_self = false;
|
||||
g_tracker_servers.leader_index = -1;
|
||||
g_tracker_leader_chg_count++;
|
||||
|
||||
logWarning("file: "__FILE__", line: %d, " \
|
||||
"client ip: %s, i be notified that two leaders occur, " \
|
||||
"should re-select leader", __LINE__, pTask->client_ip);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tracker_unlock_by_client(struct fast_task_info *pTask)
|
||||
{
|
||||
if (lock_by_client_count <= 0 || pTask->finish_callback == NULL)
|
||||
|
|
@ -3730,6 +3768,9 @@ int tracker_deal_task(struct fast_task_info *pTask)
|
|||
case TRACKER_PROTO_CMD_TRACKER_COMMIT_NEXT_LEADER:
|
||||
result = tracker_deal_commit_next_leader(pTask);
|
||||
break;
|
||||
case TRACKER_PROTO_CMD_TRACKER_NOTIFY_RESELECT_LEADER:
|
||||
result = tracker_deal_reselect_leader(pTask);
|
||||
break;
|
||||
default:
|
||||
logError("file: "__FILE__", line: %d, " \
|
||||
"client ip: %s, unkown cmd: %d", \
|
||||
|
|
|
|||
|
|
@ -433,5 +433,12 @@ typedef struct {
|
|||
char **paths; //file store paths
|
||||
} FDFSStorePaths;
|
||||
|
||||
typedef struct {
|
||||
ConnectionInfo *pTrackerServer;
|
||||
int running_time; //running seconds, more means higher weight
|
||||
int restart_interval; //restart interval, less mean higher weight
|
||||
bool if_leader; //if leader
|
||||
} TrackerRunningStatus;
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue