Browse Source

refactor(dhcp): improve DHCP client robustness and RFC compliance

Major improvements:
- Add configuration macros for timeouts and retry parameters
- Implement exponential backoff for DISCOVER retries (1s→2s→4s→8s)
- Add buffer capacity checks in message construction
- Add boundary validation for all option parsing
- Add HTYPE/HLEN validation in response verification

Protocol enhancements:
- Properly construct FQDN (Option 81) per RFC 4702
- Add HOSTNAME length limit (max 63 bytes)
- Support parsing multiple DNS servers
- Support T1 (Renewal) and T2 (Rebinding) options (58/59)
- Remove MSFT Vendor Class to avoid Windows-specific policies
- Remove unnecessary 4-byte alignment in DISCOVER messages

State machine optimizations:
- Fix P1 (RENEW) phase: use unicast to server
- Fix P2 (REBIND) phase: use broadcast when T2 reached
- Fix lease expiry handling: restart from DISCOVER
- Use configurable timeout macros instead of hardcoded values
- Improve retry logic with DHCP_MAX_SELECT_RETRIES

Logging improvements:
- Add detailed logs for T1/T2 parsing
- Add IP acquisition success log with formatted address
- Improve state transition logging

Note: IP address byte order kept as little-endian for internal
consistency with existing codebase conventions.
Wendal Chen 3 months ago
parent
commit
0c339a7cd4
1 changed files with 134 additions and 54 deletions
  1. 134 54
      components/ethernet/common/dhcp_client.c

+ 134 - 54
components/ethernet/common/dhcp_client.c

@@ -8,6 +8,15 @@
 #include "luat_log.h"
 #define DHCP_OPTION_138 138
 
+// DHCP超时和重传配置
+#define DHCP_MIN_LEASE_SEC 60
+#define DHCP_RETRY_BASE_MS 1000
+#define DHCP_RETRY_MAX_MS 8000
+#define DHCP_RENEW_TIMEOUT_MS 2500
+#define DHCP_REBIND_TIMEOUT_MS 2500
+#define DHCP_SELECT_ACK_TIMEOUT_MS 1900
+#define DHCP_MAX_SELECT_RETRIES 3
+
 
 //extern void DBG_Printf(const char* format, ...);
 //extern void DBG_HexPrintf(void *Data, unsigned int len);
@@ -20,6 +29,11 @@ void make_ip4_dhcp_msg_base(dhcp_client_info_t *dhcp, uint16_t flag, Buffer_Stru
 	{
 		escape_time = (luat_mcu_tick64_ms() - dhcp->last_tx_time) / 1000;
 	}
+	// 确保缓冲区至少能容纳DHCP基本消息(236) + magic(4) + 选项空间
+	if (out->MaxLen < DHCP_MSG_LEN + 4 + 64) {
+		LLOGE("buffer too small for DHCP message");
+		return;
+	}
 	BytesPut8ToBuf(out, DHCP_BOOTREQUEST);
 	BytesPut8ToBuf(out, DHCP_HTYPE_ETH);
 	BytesPut8ToBuf(out, 6);
@@ -32,8 +46,9 @@ void make_ip4_dhcp_msg_base(dhcp_client_info_t *dhcp, uint16_t flag, Buffer_Stru
 	BytesPutLe32ToBuf(out, 0);
 	BytesPutLe32ToBuf(out, 0);
 	OS_BufferWrite(out, dhcp->mac, 6);
-	memset(out->Data + out->Pos, 0, 10 + 64 + 128);
-	out->Pos += 10 + 64 + 128;
+	// 使用OS_BufferWrite确保安全,填充chaddr剩余10字节 + sname(64) + file(128)
+	uint8_t zeros[202] = {0}; // 10 + 64 + 128
+	OS_BufferWrite(out, zeros, sizeof(zeros));
 	BytesPutBe32ToBuf(out, DHCP_MAGIC_COOKIE);
 }
 
@@ -56,6 +71,8 @@ void ip4_dhcp_msg_add_ip_option(uint8_t id, uint32_t ip, Buffer_Struct *out)
 {
 	BytesPut8ToBuf(out, id);
 	BytesPut8ToBuf(out, 4);
+	// 保持与系统内部IP存储格式一致(小端序)
+	// 该系统所有IP读取都用BytesGetLe32,写入也需要用Le32保持一致
 	BytesPutLe32ToBuf(out, ip);
 }
 
@@ -95,14 +112,7 @@ void make_ip4_dhcp_discover_msg(dhcp_client_info_t *dhcp, Buffer_Struct *out)
 	ip4_dhcp_msg_add_bytes_option(DHCP_OPTION_HOSTNAME, (uint8_t*)dhcp->name, strlen(dhcp->name), out);
 	ip4_dhcp_msg_add_client_id_option(DHCP_OPTION_CLIENT_ID, (uint8_t*)dhcp->mac, 6, out);
 	BytesPut8ToBuf(out, 0xff);
-	if (out->Pos < (DHCP_MSG_LEN + 72))
-	{
-		out->Pos = (DHCP_MSG_LEN + 72);
-	}
-	else
-	{
-		out->Pos = (out->Pos + (4 - 1)) & (~(4 - 1));
-	}
+	// DHCP选项无需强制4字节对齐,避免产生冗余填充
 }
 
 void make_ip4_dhcp_select_msg(dhcp_client_info_t *dhcp, uint16_t flag, Buffer_Struct *out)
@@ -119,7 +129,8 @@ void make_ip4_dhcp_select_msg(dhcp_client_info_t *dhcp, uint16_t flag, Buffer_St
 			  //DHCP_OPTION_138,
 	  };
 	out->Pos = 0;
-	uint8_t full_name[35] = {0};
+	// 构造FQDN(Option 81): Flags(1) + RCode1(1) + RCode2(1) + Hostname
+	uint8_t full_name[96] = {0};
 	make_ip4_dhcp_msg_base(dhcp, flag, out);
 	ip4_dhcp_msg_add_integer_option(DHCP_OPTION_MESSAGE_TYPE, DHCP_OPTION_MESSAGE_TYPE_LEN, DHCP_REQUEST, out);
 	ip4_dhcp_msg_add_client_id_option(DHCP_OPTION_CLIENT_ID, (uint8_t*)dhcp->mac, 6, out);
@@ -128,10 +139,20 @@ void make_ip4_dhcp_select_msg(dhcp_client_info_t *dhcp, uint16_t flag, Buffer_St
 	{
 		ip4_dhcp_msg_add_ip_option(DHCP_OPTION_SERVER_ID, dhcp->server_ip, out);
 	}
-	ip4_dhcp_msg_add_bytes_option(DHCP_OPTION_HOSTNAME, (uint8_t*)dhcp->name, strlen(dhcp->name), out);
-	memcpy(full_name + 3, (uint8_t*)dhcp->name, strlen(dhcp->name));
-	ip4_dhcp_msg_add_bytes_option(81, full_name, strlen(dhcp->name) + 3, out);
-	ip4_dhcp_msg_add_bytes_option(60, (uint8_t *)"MSFT 5.0", 8, out);
+	// HOSTNAME(长度限制避免超过255)
+	{
+		size_t name_len = strlen(dhcp->name);
+		if (name_len > 63) name_len = 63; // 常见实现对主机名长度做限制
+		ip4_dhcp_msg_add_bytes_option(DHCP_OPTION_HOSTNAME, (uint8_t*)dhcp->name, (uint8_t)name_len, out);
+		// FQDN: Flags=0x01(服务器进行更新),RCode1=0,RCode2=0
+		full_name[0] = 0x01;
+		full_name[1] = 0x00;
+		full_name[2] = 0x00;
+		memcpy(full_name + 3, (uint8_t*)dhcp->name, name_len);
+		ip4_dhcp_msg_add_bytes_option(81, full_name, (uint8_t)(name_len + 3), out);
+	}
+	// Vendor Class已移除(原"MSFT 5.0"),避免服务器施加Windows特定策略
+	// 如有需要,可根据实际平台配置添加
 	ip4_dhcp_msg_add_bytes_option(DHCP_OPTION_PARAMETER_REQUEST_LIST, dhcp_discover_request_options, sizeof(dhcp_discover_request_options), out);
 	BytesPut8ToBuf(out, 0xff);
 }
@@ -186,6 +207,12 @@ int analyze_ip4_dhcp(dhcp_client_info_t *dhcp, Buffer_Struct *in)
 		LLOGD("cookie error");
 		return -2;
 	}
+	// 基本类型校验:HTYPE=ETH(1),HLEN=6
+	if (in->Data[1] != DHCP_HTYPE_ETH || in->Data[2] != 6)
+	{
+		LLOGD("htype/hlen error %d/%d", in->Data[1], in->Data[2]);
+		return -1;
+	}
 
 	if (BytesGetBe32(&in->Data[4]) != dhcp->xid)
 	{
@@ -211,7 +238,26 @@ int analyze_ip4_dhcp(dhcp_client_info_t *dhcp, Buffer_Struct *in)
 	while (in->Pos < in->MaxLen)
 	{
 __CHECK:
-		switch(in->Data[in->Pos])
+		// 边界检查,确保能读取type/len
+		if (in->Pos + 1 >= in->MaxLen)
+			break;
+		uint8_t opt = in->Data[in->Pos];
+		uint8_t len = in->Data[in->Pos + 1];
+		if (opt == DHCP_OPTION_PAD)
+		{
+			in->Pos++;
+			goto __CHECK;
+		}
+		if (opt == DHCP_OPTION_END)
+		{
+			return ack;
+		}
+		if (in->Pos + 2 + len > in->MaxLen)
+		{
+			LLOGW("option overflow opt=%d len=%d pos=%d", opt, len, in->Pos);
+			break;
+		}
+		switch(opt)
 		{
 		case DHCP_OPTION_MESSAGE_TYPE:
 			ack = in->Data[in->Pos + 2];
@@ -223,14 +269,15 @@ __CHECK:
 			if (DHCP_ACK == ack)
 			{
 				dhcp->lease_time = BytesGetBe32(&in->Data[in->Pos + 2]);
-				if (dhcp->lease_time < 60)
+				if (dhcp->lease_time < DHCP_MIN_LEASE_SEC)
 				{
-					LLOGW("lease time too short %d, set to 60", dhcp->lease_time);
-					dhcp->lease_time = 60; // 最小租约时间为60秒
+					LLOGW("lease time too short %d, set to %d", dhcp->lease_time, DHCP_MIN_LEASE_SEC);
+					dhcp->lease_time = DHCP_MIN_LEASE_SEC;
 				}
 				lease_time = dhcp->lease_time;
 				lease_time *= 1000;
 				dhcp->lease_end_time = luat_mcu_tick64_ms() + lease_time;
+				// 默认按比例,若后续解析到T1/T2会覆盖
 				dhcp->lease_p1_time = dhcp->lease_end_time - (lease_time >> 1);
 				dhcp->lease_p2_time = dhcp->lease_end_time - (lease_time >> 3);
 			}
@@ -243,20 +290,40 @@ __CHECK:
 			dhcp->gateway = BytesGetLe32(&in->Data[in->Pos + 2]);
 			break;
 		case DHCP_OPTION_DNS_SERVER:
-			dhcp->dns_server[0] = BytesGetLe32(&in->Data[in->Pos + 2]);
-			dhcp->dns_server[1] = (in->Data[in->Pos + 1] >= 8)?BytesGetLe32(&in->Data[in->Pos + 6]):0;
+			{
+				// 解析所有DNS,每4字节一个地址
+				uint8_t count = len / 4;
+				for (uint8_t i = 0; i < count && i < 2; i++)
+				{
+					uint32_t addr = BytesGetLe32(&in->Data[in->Pos + 2 + i * 4]);
+					dhcp->dns_server[i] = addr;
+				}
+				for (uint8_t i = count; i < 2; i++) dhcp->dns_server[i] = 0;
+			}
 			break;
-		case DHCP_OPTION_PAD:
-			in->Pos++;
-			goto __CHECK;
+		case 58: // Renewal Time (T1)
+			if (DHCP_ACK == ack && len == 4)
+			{
+				uint64_t t1 = BytesGetBe32(&in->Data[in->Pos + 2]);
+				if (t1 < DHCP_MIN_LEASE_SEC) t1 = DHCP_MIN_LEASE_SEC;
+				dhcp->lease_p1_time = luat_mcu_tick64_ms() + t1 * 1000;
+				LLOGD("T1(Renew)=%llu sec", t1);
+			}
+			break;
+		case 59: // Rebinding Time (T2)
+			if (DHCP_ACK == ack && len == 4)
+			{
+				uint64_t t2 = BytesGetBe32(&in->Data[in->Pos + 2]);
+				if (t2 < DHCP_MIN_LEASE_SEC) t2 = DHCP_MIN_LEASE_SEC;
+				dhcp->lease_p2_time = luat_mcu_tick64_ms() + t2 * 1000;
+				LLOGD("T2(Rebind)=%llu sec", t2);
+			}
 			break;
-		case DHCP_OPTION_END:
-			return ack;
 		default:
 			//LLOGD("jump %d,%d", in->Data[in->Pos], in->Data[in->Pos+1]);
 			break;
 		}
-		in->Pos += 2 + in->Data[in->Pos+1];
+		in->Pos += 2 + len;
 	}
 	return ack;
 }
@@ -309,41 +376,51 @@ int ip4_dhcp_run(dhcp_client_info_t *dhcp, Buffer_Struct *in, Buffer_Struct *out
 	case DHCP_STATE_WAIT_LEASE_P1_ACK:
 		if (in && (result == DHCP_ACK))
 		{
-			LLOGD("lease p1 require ip ok");
+			LLOGD("lease p1 renew ip ok");
 			dhcp->state = DHCP_STATE_WAIT_LEASE_P1;
 			break;
 		}
-		if (luat_mcu_tick64_ms() >= (dhcp->last_tx_time + 2500))
+		if (tnow >= (dhcp->last_tx_time + DHCP_RENEW_TIMEOUT_MS))
 		{
-			LLOGD("lease p1 require ip long time no ack");
+			LLOGD("lease p1 renew timeout, enter rebind phase");
 			dhcp->state = DHCP_STATE_WAIT_LEASE_P2;
 		}
 		break;
 	case DHCP_STATE_WAIT_LEASE_P2:
 		if (tnow >= dhcp->lease_p2_time)
 		{
-			dhcp->state = DHCP_STATE_WAIT_SELECT_ACK;
+			LLOGD("lease p2 rebind time reached, broadcast request");
+			// Rebind阶段使用广播,不指定server_ip
+			flag = 0x8000;
+			*remote_ip = 0xffffffff;
+			dhcp->state = DHCP_STATE_WAIT_LEASE_P2_ACK;
 			goto DHCP_NEED_REQUIRE;
 		}
 		break;
 	case DHCP_STATE_WAIT_LEASE_P2_ACK:
 		if (in && (result == DHCP_ACK))
 		{
-			LLOGD("lease p2 require ip ok");
+			LLOGD("lease p2 rebind ip ok");
 			dhcp->state = DHCP_STATE_WAIT_LEASE_P1;
 			break;
 		}
-		if (tnow >= (dhcp->last_tx_time + 2500))
+		if (tnow >= (dhcp->last_tx_time + DHCP_REBIND_TIMEOUT_MS))
 		{
-			LLOGD("lease p2 require ip long time no ack");
+			LLOGD("lease p2 rebind timeout, wait for lease expiry");
 			dhcp->state = DHCP_STATE_WAIT_LEASE_END;
 		}
 		break;
 	case DHCP_STATE_WAIT_LEASE_END:
 		if (tnow >= dhcp->lease_end_time)
 		{
-			dhcp->state = DHCP_STATE_WAIT_SELECT_ACK;
-			goto DHCP_NEED_REQUIRE;
+			LLOGD("lease expired, restart from discover");
+			dhcp->state = DHCP_STATE_DISCOVER;
+			dhcp->temp_ip = 0;
+			dhcp->server_ip = 0;
+			dhcp->ip = 0;
+			dhcp->last_tx_time = 0;
+			dhcp->discover_cnt = 0;
+			// 下一轮循环会触发DISCOVER
 		}
 		break;
 //	case DHCP_STATE_WAIT_REQUIRE_ACK:
@@ -373,42 +450,45 @@ int ip4_dhcp_run(dhcp_client_info_t *dhcp, Buffer_Struct *in, Buffer_Struct *out
 	case DHCP_STATE_WAIT_OFFER:
 		if (in && (result == DHCP_OFFER))
 		{
-			LLOGD("select offer, wait ack");
+			LLOGD("got offer, send request");
 			dhcp->state = DHCP_STATE_WAIT_SELECT_ACK;
 			dhcp->wait_selec_ack_cnt = 0;
 			goto DHCP_NEED_REQUIRE;
 		}
-		if (tnow >= (dhcp->last_tx_time + (dhcp->discover_cnt * 500) + 900))
+		// 指数退避:1s, 2s, 4s, 8s...
 		{
-			LLOGD("long time no offer, resend");
-			dhcp->discover_cnt++;
-			OS_ReInitBuffer(out, 512);
-			make_ip4_dhcp_discover_msg(dhcp, out);
-			dhcp->last_tx_time = luat_mcu_tick64_ms();
+			uint32_t backoff = DHCP_RETRY_BASE_MS << dhcp->discover_cnt;
+			if (backoff > DHCP_RETRY_MAX_MS) backoff = DHCP_RETRY_MAX_MS;
+			if (tnow >= (dhcp->last_tx_time + backoff))
+			{
+				LLOGD("no offer after %ums, resend discover (retry %d)", backoff, dhcp->discover_cnt);
+				dhcp->discover_cnt++;
+				OS_ReInitBuffer(out, 512);
+				make_ip4_dhcp_discover_msg(dhcp, out);
+				dhcp->last_tx_time = luat_mcu_tick64_ms();
+			}
 		}
 		break;
 	case DHCP_STATE_WAIT_SELECT_ACK:
 		if (in && (result == DHCP_ACK))
 		{
-//			LLOGD("need check ip %x,%x,%x,%x", dhcp->temp_ip, dhcp->submask, dhcp->gateway, dhcp->server_ip);
 			dhcp->ip = dhcp->temp_ip;
 			dhcp->state = DHCP_STATE_CHECK;
-
 			dhcp->weak_temp_ip = 0;
 			dhcp->weak_server_ip = 0;
-
-			LLOGD("DHCP get ip ready");
+			LLOGD("DHCP acquired IP %d.%d.%d.%d", 
+				(dhcp->ip) & 0xFF, (dhcp->ip >> 8) & 0xFF, 
+				(dhcp->ip >> 16) & 0xFF, (dhcp->ip >> 24) & 0xFF);
 			break;
 		}
-		if (dhcp->wait_selec_ack_cnt > 3)
+		if (dhcp->wait_selec_ack_cnt >= DHCP_MAX_SELECT_RETRIES)
 		{
-			LLOGD("select ip long time no ack");
+			LLOGD("select request timeout after %d retries", dhcp->wait_selec_ack_cnt);
 			if ((dhcp->weak_temp_ip == dhcp->temp_ip) && (dhcp->weak_server_ip == dhcp->server_ip))
 			{
-				LLOGD("get same ip and server, maybe dhcp server error, use old ip");
+				LLOGW("same offer repeated, assume server issue, accept IP");
 				dhcp->ip = dhcp->temp_ip;
 				dhcp->state = DHCP_STATE_CHECK;
-
 				dhcp->weak_temp_ip = 0;
 				dhcp->weak_server_ip = 0;
 				break;
@@ -418,6 +498,7 @@ int ip4_dhcp_run(dhcp_client_info_t *dhcp, Buffer_Struct *in, Buffer_Struct *out
 				dhcp->weak_temp_ip = dhcp->temp_ip;
 				dhcp->weak_server_ip = dhcp->server_ip;
 			}
+			// 重新DISCOVER
 			OS_ReInitBuffer(out, 512);
 			make_ip4_dhcp_discover_msg(dhcp, out);
 			dhcp->last_tx_time = luat_mcu_tick64_ms();
@@ -426,14 +507,13 @@ int ip4_dhcp_run(dhcp_client_info_t *dhcp, Buffer_Struct *in, Buffer_Struct *out
 		}
 		else
 		{
-			if (luat_mcu_tick64_ms() >= (dhcp->last_tx_time + 1900 * (dhcp->wait_selec_ack_cnt + 1)))
+			if (tnow >= (dhcp->last_tx_time + DHCP_SELECT_ACK_TIMEOUT_MS * (dhcp->wait_selec_ack_cnt + 1)))
 			{
 				dhcp->wait_selec_ack_cnt++;
-				LLOGD("select ip no ack,resend %d", dhcp->wait_selec_ack_cnt);
+				LLOGD("select request no ack, retry %d", dhcp->wait_selec_ack_cnt);
 				goto DHCP_NEED_REQUIRE;
 			}
 		}
-
 		break;
 	case DHCP_STATE_REQUIRE:
 	case DHCP_STATE_SELECT: