From 0e12267ff2b2b5a95fa243f69f9dbe466a19d2ae Mon Sep 17 00:00:00 2001 From: Affaan Mustafa Date: Mon, 11 May 2026 07:51:08 -0400 Subject: [PATCH] docs: salvage network operations patterns --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- AGENTS.md | 6 +- README.md | 12 +- README.zh-CN.md | 2 +- agents/network-config-reviewer.md | 97 ++++++++++ agents/network-troubleshooter.md | 119 ++++++++++++ docs/zh-CN/AGENTS.md | 6 +- docs/zh-CN/README.md | 10 +- manifests/install-modules.json | 5 +- package.json | 3 + skills/homelab-network-setup/SKILL.md | 129 +++++++++++++ skills/network-config-validation/SKILL.md | 210 ++++++++++++++++++++++ skills/network-interface-health/SKILL.md | 152 ++++++++++++++++ 14 files changed, 734 insertions(+), 21 deletions(-) create mode 100644 agents/network-config-reviewer.md create mode 100644 agents/network-troubleshooter.md create mode 100644 skills/homelab-network-setup/SKILL.md create mode 100644 skills/network-config-validation/SKILL.md create mode 100644 skills/network-interface-health/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 3fbabfea..982fdfea 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,7 +11,7 @@ { "name": "ecc", "source": "./", - "description": "The most comprehensive Claude Code plugin — 51 agents, 189 skills, 69 legacy command shims, selective install profiles, and production-ready hooks for TDD, security scanning, code review, and continuous learning", + "description": "The most comprehensive Claude Code plugin — 53 agents, 192 skills, 69 legacy command shims, selective install profiles, and production-ready hooks for TDD, security scanning, code review, and continuous learning", "version": "2.0.0-rc.1", "author": { "name": "Affaan Mustafa", diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index e1ee3c3f..21b06901 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "ecc", "version": "2.0.0-rc.1", - "description": "Battle-tested Claude Code plugin for engineering teams — 51 agents, 189 skills, 69 legacy command shims, production-ready hooks, and selective install workflows evolved through continuous real-world use", + "description": "Battle-tested Claude Code plugin for engineering teams — 53 agents, 192 skills, 69 legacy command shims, production-ready hooks, and selective install workflows evolved through continuous real-world use", "author": { "name": "Affaan Mustafa", "url": "https://x.com/affaanmustafa" diff --git a/AGENTS.md b/AGENTS.md index 6c66ad5d..943f79eb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,6 +1,6 @@ # Everything Claude Code (ECC) — Agent Instructions -This is a **production-ready AI coding plugin** providing 51 specialized agents, 189 skills, 69 commands, and automated hook workflows for software development. +This is a **production-ready AI coding plugin** providing 53 specialized agents, 192 skills, 69 commands, and automated hook workflows for software development. **Version:** 2.0.0-rc.1 @@ -145,8 +145,8 @@ Troubleshoot failures: check test isolation → verify mocks → fix implementat ## Project Structure ``` -agents/ — 51 specialized subagents -skills/ — 189 workflow skills and domain knowledge +agents/ — 53 specialized subagents +skills/ — 192 workflow skills and domain knowledge commands/ — 69 slash commands hooks/ — Trigger-based automations rules/ — Always-follow guidelines (common + per-language) diff --git a/README.md b/README.md index fb7329a3..f28dd79d 100644 --- a/README.md +++ b/README.md @@ -350,7 +350,7 @@ If you stacked methods, clean up in this order: /plugin list ecc@ecc ``` -**That's it!** You now have access to 51 agents, 189 skills, and 69 legacy command shims. +**That's it!** You now have access to 53 agents, 192 skills, and 69 legacy command shims. ### Dashboard GUI @@ -448,7 +448,7 @@ everything-claude-code/ | |-- plugin.json # Plugin metadata and component paths | |-- marketplace.json # Marketplace catalog for /plugin marketplace add | -|-- agents/ # 51 specialized subagents for delegation +|-- agents/ # 53 specialized subagents for delegation | |-- planner.md # Feature implementation planning | |-- architect.md # System design decisions | |-- tdd-guide.md # Test-driven development @@ -1336,9 +1336,9 @@ The configuration is automatically detected from `.opencode/opencode.json`. | Feature | Claude Code | OpenCode | Status | |---------|-------------|----------|--------| -| Agents | PASS: 51 agents | PASS: 12 agents | **Claude Code leads** | +| Agents | PASS: 53 agents | PASS: 12 agents | **Claude Code leads** | | Commands | PASS: 69 commands | PASS: 31 commands | **Claude Code leads** | -| Skills | PASS: 189 skills | PASS: 37 skills | **Claude Code leads** | +| Skills | PASS: 192 skills | PASS: 37 skills | **Claude Code leads** | | Hooks | PASS: 8 event types | PASS: 11 events | **OpenCode has more!** | | Rules | PASS: 29 rules | PASS: 13 instructions | **Claude Code leads** | | MCP Servers | PASS: 14 servers | PASS: Full | **Full parity** | @@ -1441,9 +1441,9 @@ ECC is the **first plugin to maximize every major AI coding tool**. Here's how e | Feature | Claude Code | Cursor IDE | Codex CLI | OpenCode | |---------|------------|------------|-----------|----------| -| **Agents** | 51 | Shared (AGENTS.md) | Shared (AGENTS.md) | 12 | +| **Agents** | 53 | Shared (AGENTS.md) | Shared (AGENTS.md) | 12 | | **Commands** | 69 | Shared | Instruction-based | 31 | -| **Skills** | 189 | Shared | 10 (native format) | 37 | +| **Skills** | 192 | Shared | 10 (native format) | 37 | | **Hook Events** | 8 types | 15 types | None yet | 11 types | | **Hook Scripts** | 20+ scripts | 16 scripts (DRY adapter) | N/A | Plugin hooks | | **Rules** | 34 (common + lang) | 34 (YAML frontmatter) | Instruction-based | 13 instructions | diff --git a/README.zh-CN.md b/README.zh-CN.md index 0a52684b..95eeb68c 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -160,7 +160,7 @@ Copy-Item -Recurse rules/typescript "$HOME/.claude/rules/" /plugin list ecc@ecc ``` -**完成!** 你现在可以使用 51 个代理、189 个技能和 69 个命令。 +**完成!** 你现在可以使用 53 个代理、192 个技能和 69 个命令。 ### multi-* 命令需要额外配置 diff --git a/agents/network-config-reviewer.md b/agents/network-config-reviewer.md new file mode 100644 index 00000000..0a362c05 --- /dev/null +++ b/agents/network-config-reviewer.md @@ -0,0 +1,97 @@ +--- +name: network-config-reviewer +description: Reviews router and switch configurations for security, correctness, stale references, risky change-window commands, and missing operational guardrails. +tools: ["Read", "Grep"] +model: sonnet +--- + +You are a senior network configuration reviewer. You audit proposed or existing +router and switch configuration and return prioritized findings with evidence. + +## Scope + +- Cisco IOS and IOS-XE style running configuration. +- Interface, VLAN, ACL, VTY, AAA, SNMP, NTP, logging, routing, and banner blocks. +- Proposed change snippets that will be pasted into a change window. +- Read-only review only. Do not apply configuration or suggest live testing that + removes protections. + +## Review Workflow + +1. Identify the device role, platform, and change intent if they are present. +2. Parse configuration sections: interfaces, routing, ACLs, line vty, AAA, SNMP, + logging, NTP, and banners. +3. Check the proposed change first, then adjacent existing config needed to prove + a finding. +4. Report only findings with enough evidence to act on. +5. Separate hard blockers from best-practice improvements. + +## Severity Guide + +### Critical + +- Plaintext or default credentials. +- `snmp-server community public` or `private`, especially with write access. +- Telnet-only management or internet-facing VTY access with no source restriction. +- Proposed destructive commands such as `reload`, `erase`, `format`, broad + `no interface`, or removing an entire routing process without rollback context. + +### High + +- SSH v1, weak enable password usage, missing AAA where the environment expects it. +- ACLs referenced by interfaces or routing policy but not defined. +- Route-maps, prefix-lists, or community-lists referenced by BGP but not defined. +- Subnet overlaps or duplicate interface IPs. + +### Medium + +- No NTP, timestamps, remote logging, or saved rollback evidence. +- Management-plane access not limited to a management subnet. +- Missing descriptions on important uplinks, trunks, or routed links. + +### Low + +- Naming, comment, and documentation cleanup. +- Suggested monitoring additions that are not required for the change to be safe. + +## Output Format + +```text +## Network Configuration Review: + +### Critical +[CRITICAL-1] +File/section: +Evidence: +Risk: +Fix: + +### High +... + +### Summary +| Severity | Count | +| --- | ---: | +| Critical | 0 | +| High | 0 | +| Medium | 0 | +| Low | 0 | + +Verdict: PASS | WARNING | BLOCK +Tests checked: +Residual risk: +``` + +Use `BLOCK` for any Critical finding or proposed destructive change without a +rollback plan. Use `WARNING` for High or Medium findings that do not block a +maintenance window by themselves. Use `PASS` only when no actionable findings are +present. + +## Safety Rules + +- Do not recommend removing ACLs, disabling firewall rules, or opening VTY access + as a diagnostic shortcut. +- Prefer read-only confirmation commands such as `show running-config`, + `show ip access-lists`, `show ip route`, `show logging`, and `show interfaces`. +- If a command changes device state, label it as a proposed fix and require a + maintenance window, rollback plan, and verification step. diff --git a/agents/network-troubleshooter.md b/agents/network-troubleshooter.md new file mode 100644 index 00000000..d0f7610b --- /dev/null +++ b/agents/network-troubleshooter.md @@ -0,0 +1,119 @@ +--- +name: network-troubleshooter +description: Diagnoses network connectivity, routing, DNS, interface, and policy symptoms with a read-only OSI-layer workflow and evidence-backed root cause summary. +tools: ["Read", "Bash", "Grep"] +model: sonnet +--- + +You are a senior network troubleshooting agent. You diagnose symptoms +systematically and produce a concise root cause summary with evidence. + +## Scope + +- Connectivity, packet loss, slow links, DNS failures, route reachability, BGP + neighbor state, VLAN reachability, and ACL/firewall symptoms. +- Router, switch, Linux host, and homelab environments. +- Read-only diagnosis. Do not apply configuration changes while diagnosing. + +## Workflow + +1. Characterize the symptom. + - What fails? + - Who is affected? + - When did it start? + - What changed recently? +2. Pick the starting layer, then work downward or upward as evidence requires. +3. Ask for missing command output only when it changes the diagnosis. +4. Confirm that the suspected cause explains all observed symptoms. +5. End with a root cause summary and verification plan. + +## Layer Checks + +### Layer 1 and 2 + +Use for link-down, packet loss, CRCs, drops, and VLAN mismatch symptoms. + +```text +show interfaces status +show interfaces +show vlan brief +show spanning-tree vlan +``` + +Look for down/down state, CRC counters increasing, duplex mismatch, wrong access +VLAN, blocked spanning-tree state, or trunk VLANs missing from the allowed list. + +### Layer 3 + +Use for gateway, routing, and reachability symptoms. + +```text +show ip interface brief +show ip route +ping source +traceroute source +``` + +Look for missing connected routes, wrong next hop, asymmetric routing, stale static +routes, or a default route that points to the wrong upstream. + +### DNS + +Use when IP connectivity works but names fail. + +```text +dig @ +dig @ +nslookup +``` + +If public DNS works but local DNS fails, focus on the resolver, DHCP DNS option, +firewall rules to UDP/TCP 53, or local zones. + +### Policy And Firewall + +Use read-only counters and logs. Do not remove policy to test. + +```text +show ip access-lists +show running-config interface +show logging | include |ACL|DENY|DROP +``` + +If a deny counter increments for the failing flow, propose a narrow allow rule and +verification step instead of disabling the ACL. + +## Output Format + +```text +## Diagnosis: + +Symptom: +Affected scope: +Layer: + +Evidence: +- `` -> +- `` -> + +Root cause: + + +Recommended fix: +1. +2. + +Verification: +- `` should show + +Residual risk: + +``` + +## Guardrails + +- Prefer evidence over guesses. +- Never recommend temporarily removing ACLs, firewall rules, authentication, or + management-plane restrictions. +- If a live command changes state, label it clearly as a remediation step, not a + diagnostic command. diff --git a/docs/zh-CN/AGENTS.md b/docs/zh-CN/AGENTS.md index e0fd304a..d7b6be41 100644 --- a/docs/zh-CN/AGENTS.md +++ b/docs/zh-CN/AGENTS.md @@ -1,6 +1,6 @@ # Everything Claude Code (ECC) — 智能体指令 -这是一个**生产就绪的 AI 编码插件**,提供 51 个专业代理、189 项技能、69 条命令以及自动化钩子工作流,用于软件开发。 +这是一个**生产就绪的 AI 编码插件**,提供 53 个专业代理、192 项技能、69 条命令以及自动化钩子工作流,用于软件开发。 **版本:** 2.0.0-rc.1 @@ -146,8 +146,8 @@ ## 项目结构 ``` -agents/ — 51 个专业子代理 -skills/ — 189 个工作流技能和领域知识 +agents/ — 53 个专业子代理 +skills/ — 192 个工作流技能和领域知识 commands/ — 69 个斜杠命令 hooks/ — 基于触发的自动化 rules/ — 始终遵循的指导方针(通用 + 每种语言) diff --git a/docs/zh-CN/README.md b/docs/zh-CN/README.md index da6f4574..59a9219b 100644 --- a/docs/zh-CN/README.md +++ b/docs/zh-CN/README.md @@ -224,7 +224,7 @@ Copy-Item -Recurse rules/typescript "$HOME/.claude/rules/" /plugin list ecc@ecc ``` -**搞定!** 你现在可以使用 51 个智能体、189 项技能和 69 个命令了。 +**搞定!** 你现在可以使用 53 个智能体、192 项技能和 69 个命令了。 *** @@ -1132,9 +1132,9 @@ opencode | 功能特性 | Claude Code | OpenCode | 状态 | |---------|-------------|----------|--------| -| 智能体 | PASS: 51 个 | PASS: 12 个 | **Claude Code 领先** | +| 智能体 | PASS: 53 个 | PASS: 12 个 | **Claude Code 领先** | | 命令 | PASS: 69 个 | PASS: 31 个 | **Claude Code 领先** | -| 技能 | PASS: 189 项 | PASS: 37 项 | **Claude Code 领先** | +| 技能 | PASS: 192 项 | PASS: 37 项 | **Claude Code 领先** | | 钩子 | PASS: 8 种事件类型 | PASS: 11 种事件 | **OpenCode 更多!** | | 规则 | PASS: 29 条 | PASS: 13 条指令 | **Claude Code 领先** | | MCP 服务器 | PASS: 14 个 | PASS: 完整 | **完全对等** | @@ -1240,9 +1240,9 @@ ECC 是**第一个最大化利用每个主要 AI 编码工具的插件**。以 | 功能特性 | Claude Code | Cursor IDE | Codex CLI | OpenCode | |---------|------------|------------|-----------|----------| -| **智能体** | 51 | 共享 (AGENTS.md) | 共享 (AGENTS.md) | 12 | +| **智能体** | 53 | 共享 (AGENTS.md) | 共享 (AGENTS.md) | 12 | | **命令** | 69 | 共享 | 基于指令 | 31 | -| **技能** | 189 | 共享 | 10 (原生格式) | 37 | +| **技能** | 192 | 共享 | 10 (原生格式) | 37 | | **钩子事件** | 8 种类型 | 15 种类型 | 暂无 | 11 种类型 | | **钩子脚本** | 20+ 个脚本 | 16 个脚本 (DRY 适配器) | N/A | 插件钩子 | | **规则** | 34 (通用 + 语言) | 34 (YAML 前页) | 基于指令 | 13 条指令 | diff --git a/manifests/install-modules.json b/manifests/install-modules.json index bf43984d..a364c1c5 100644 --- a/manifests/install-modules.json +++ b/manifests/install-modules.json @@ -510,7 +510,10 @@ "description": "Deployment workflows, Docker patterns, and infrastructure skills.", "paths": [ "skills/deployment-patterns", - "skills/docker-patterns" + "skills/docker-patterns", + "skills/homelab-network-setup", + "skills/network-config-validation", + "skills/network-interface-health" ], "targets": [ "claude", diff --git a/package.json b/package.json index 8746d7db..c8c823b0 100644 --- a/package.json +++ b/package.json @@ -157,6 +157,7 @@ "skills/google-workspace-ops/", "skills/healthcare-phi-compliance/", "skills/hipaa-compliance/", + "skills/homelab-network-setup/", "skills/hookify-rules/", "skills/inventory-demand-planning/", "skills/investor-materials/", @@ -186,6 +187,8 @@ "skills/messages-ops/", "skills/nanoclaw-repl/", "skills/nestjs-patterns/", + "skills/network-config-validation/", + "skills/network-interface-health/", "skills/nodejs-keccak256/", "skills/nutrient-document-processing/", "skills/perl-patterns/", diff --git a/skills/homelab-network-setup/SKILL.md b/skills/homelab-network-setup/SKILL.md new file mode 100644 index 00000000..5a023349 --- /dev/null +++ b/skills/homelab-network-setup/SKILL.md @@ -0,0 +1,129 @@ +--- +name: homelab-network-setup +description: Practical home and homelab network planning for gateways, switches, access points, IP ranges, DHCP reservations, DNS, cabling, and common beginner mistakes. +origin: community +--- + +# Homelab Network Setup + +Use this skill to design a home or small-lab network that can grow without +needing a full rebuild. + +## When to Use + +- Planning a new home network or redesigning an ISP-router-only setup. +- Choosing gateway, switch, and access point roles. +- Designing IP ranges, DHCP scopes, static reservations, and DNS. +- Preparing for future VLANs, Pi-hole, NAS, lab servers, or VPN access. +- Troubleshooting a new network that has double NAT, unstable Wi-Fi, or changing + server addresses. + +## How It Works + +Start by separating device roles: + +```text +Internet + | +Modem or ONT + | +Gateway or router NAT, firewall, DHCP, DNS, inter-VLAN routing + | +Managed switch wired clients, AP uplinks, optional VLAN trunks + | +Access points Wi-Fi only; ideally wired backhaul +Servers and NAS stable addresses, DNS names, monitoring +Clients and IoT DHCP pools, isolated later if VLANs are available +``` + +Pick a gateway that matches the operator, not just the feature checklist: + +| Option | Best fit | Notes | +| --- | --- | --- | +| ISP router | Basic internet only | Limited control and often poor VLAN support | +| UniFi gateway | Managed home network | Good UI, ecosystem lock-in | +| OPNsense or pfSense | Flexible homelab | Strong VLAN, firewall, VPN, and DNS control | +| MikroTik | Advanced network users | Powerful, but easy to misconfigure | +| Linux router | Tinkerers | Document rollback before using as primary gateway | + +## IP Plan + +Avoid the most common default, `192.168.1.0/24`, when you expect to use VPNs. +It often conflicts with hotels, offices, and ISP routers. + +```text +Example small homelab plan: + +192.168.10.0/24 trusted clients +192.168.20.0/24 IoT and media devices +192.168.30.0/24 servers and NAS +192.168.40.0/24 guest Wi-Fi +192.168.99.0/24 network management + +Gateway convention: .1 +Infrastructure reservations: .2 through .49 +Dynamic DHCP pool: .50 through .240 +Spare room: .241 through .254 +``` + +Use `home.arpa` for local names. It is reserved for home networks and avoids the +leakage/conflict problems of ad hoc names like `home.lan`. + +```text +nas.home.arpa +pihole.home.arpa +gateway.home.arpa +switch-01.home.arpa +``` + +## DHCP And DNS + +- Use DHCP reservations for anything you SSH into, bookmark, monitor, or expose + as a service. +- Hand out the gateway as DNS until a local resolver is intentionally deployed. +- If using Pi-hole or another DNS filter, give it a reservation first, then point + DHCP DNS options at that address. +- Keep a small static/reserved range per subnet so replacements do not collide + with dynamic leases. + +## Cabling And Wi-Fi + +- Prefer wired AP backhaul over mesh when you can run Ethernet. +- Use a PoE switch for APs and cameras if the budget allows it. +- Label both ends of each cable and keep a simple port map. +- Put the gateway, switch, DNS server, and NAS on UPS power if outages are common. + +## Examples + +### Beginner Upgrade + +Goal: Keep the ISP router but stabilize a small lab. + +1. Set DHCP reservations for NAS, Pi, and any SSH hosts. +2. Move local names to `home.arpa`. +3. Disable duplicate DHCP servers on secondary routers or APs. +4. Wire the main AP instead of relying on wireless backhaul. + +### VLAN-Ready Plan + +Goal: Prepare for future segmentation without enabling it immediately. + +1. Choose non-overlapping /24 ranges for trusted, IoT, servers, guest, and + management. +2. Reserve .1 for the gateway and .2-.49 for infrastructure on every subnet. +3. Buy a gateway and switch that support VLANs and inter-VLAN firewall rules. +4. Document which SSIDs and switch ports will eventually map to each network. + +## Anti-Patterns + +- Double NAT without a reason or documentation. +- Using `192.168.1.0/24` when VPN access is planned. +- Dynamic addresses for NAS, Pi-hole, Home Assistant, or other service hosts. +- Consumer routers repurposed as APs while their DHCP servers are still enabled. +- Flat networks with cameras, smart plugs, laptops, and servers all sharing the + same trust boundary. + +## See Also + +- Skill: `network-interface-health` +- Skill: `network-config-validation` diff --git a/skills/network-config-validation/SKILL.md b/skills/network-config-validation/SKILL.md new file mode 100644 index 00000000..75382c44 --- /dev/null +++ b/skills/network-config-validation/SKILL.md @@ -0,0 +1,210 @@ +--- +name: network-config-validation +description: Pre-deployment checks for router and switch configuration, including dangerous commands, duplicate addresses, subnet overlaps, stale references, management-plane risk, and IOS-style security hygiene. +origin: community +--- + +# Network Config Validation + +Use this skill to review network configuration before a change window or before +an automation run touches production devices. + +## When to Use + +- Reviewing Cisco IOS or IOS-XE style snippets before deployment. +- Auditing generated config from scripts or templates. +- Looking for dangerous commands, duplicate IP addresses, or subnet overlaps. +- Checking whether ACLs, route-maps, prefix-lists, or line policies are referenced + but not defined. +- Building lightweight pre-flight scripts for network automation. + +## How It Works + +Treat config validation as layered evidence, not as a complete parser. Regex +checks are useful for pre-flight warnings, but final approval still needs a +network engineer to review intent, platform syntax, and rollback steps. + +Validate in this order: + +1. Destructive commands. +2. Credential and management-plane exposure. +3. Duplicate addresses and overlapping subnets. +4. Stale references to ACLs, route-maps, prefix-lists, and interfaces. +5. Operational hygiene such as NTP, timestamps, remote logging, and banners. + +## Dangerous Command Detection + +```python +import re + +DANGEROUS_PATTERNS: list[tuple[re.Pattern[str], str]] = [ + (re.compile(r"\breload\b", re.I), "reload causes downtime"), + (re.compile(r"\berase\s+(startup|nvram|flash)", re.I), "erases persistent storage"), + (re.compile(r"\bformat\b", re.I), "formats a device filesystem"), + (re.compile(r"\bno\s+router\s+(bgp|ospf|eigrp)\b", re.I), "removes a routing process"), + (re.compile(r"\bno\s+interface\s+\S+", re.I), "removes interface configuration"), + (re.compile(r"\baaa\s+new-model\b", re.I), "changes authentication behavior"), + (re.compile(r"\bcrypto\s+key\s+(zeroize|generate)\b", re.I), "changes device SSH keys"), +] + +def find_dangerous_commands(lines: list[str]) -> list[dict[str, str | int]]: + findings = [] + for line_number, line in enumerate(lines, start=1): + stripped = line.strip() + for pattern, reason in DANGEROUS_PATTERNS: + if pattern.search(stripped): + findings.append({ + "line": line_number, + "command": stripped, + "reason": reason, + }) + return findings +``` + +## Duplicate IPs And Subnet Overlaps + +```python +import ipaddress +import re +from collections import Counter + +IP_ADDRESS_RE = re.compile( + r"^\s*ip address\s+" + r"(?P\d{1,3}(?:\.\d{1,3}){3})\s+" + r"(?P\d{1,3}(?:\.\d{1,3}){3})\b", + re.I | re.M, +) + +def extract_interfaces(config: str) -> list[dict[str, str]]: + results = [] + current = None + for line in config.splitlines(): + if line.startswith("interface "): + current = line.split(maxsplit=1)[1] + continue + match = IP_ADDRESS_RE.match(line) + if current and match: + ip = match.group("ip") + mask = match.group("mask") + network = ipaddress.ip_interface(f"{ip}/{mask}").network + results.append({"interface": current, "ip": ip, "network": str(network)}) + return results + +def find_duplicate_ips(config: str) -> list[str]: + ips = [entry["ip"] for entry in extract_interfaces(config)] + counts = Counter(ips) + return sorted(ip for ip, count in counts.items() if count > 1) + +def find_subnet_overlaps(config: str) -> list[tuple[str, str]]: + networks = [ipaddress.ip_network(entry["network"]) for entry in extract_interfaces(config)] + overlaps = [] + for index, left in enumerate(networks): + for right in networks[index + 1:]: + if left.overlaps(right): + overlaps.append((str(left), str(right))) + return overlaps +``` + +## Management-Plane Checks + +Parse VTY blocks by section so access-class checks do not spill across unrelated +lines. + +```python +import re + +def iter_blocks(config: str, starts_with: str) -> list[str]: + blocks = [] + current: list[str] = [] + for line in config.splitlines(): + if line.startswith(starts_with): + if current: + blocks.append("\n".join(current)) + current = [line] + continue + if current: + if line and not line.startswith(" "): + blocks.append("\n".join(current)) + current = [] + else: + current.append(line) + if current: + blocks.append("\n".join(current)) + return blocks + +def check_vty_blocks(config: str) -> list[str]: + issues = [] + for block in iter_blocks(config, "line vty"): + if re.search(r"transport\s+input\s+.*telnet", block, re.I): + issues.append("VTY allows Telnet; require SSH only.") + if not re.search(r"\baccess-class\s+\S+\s+in\b", block, re.I): + issues.append("VTY block has no inbound access-class source restriction.") + if not re.search(r"\bexec-timeout\s+\d+\s+\d+\b", block, re.I): + issues.append("VTY block has no explicit exec-timeout.") + return issues +``` + +## Security Hygiene Checks + +```python +SECURITY_PATTERNS = [ + (re.compile(r"\bsnmp-server community\s+(public|private)\b", re.I), + "default SNMP community configured"), + (re.compile(r"\bsnmp-server community\s+\S+", re.I), + "SNMPv2 community string configured; prefer SNMPv3 authPriv"), + (re.compile(r"\bip ssh version 1\b", re.I), + "SSH version 1 enabled"), + (re.compile(r"\benable password\b", re.I), + "enable password is present; use enable secret"), + (re.compile(r"\busername\s+\S+\s+password\b", re.I), + "local username uses password instead of secret"), +] + +BEST_PRACTICE_PATTERNS = [ + (re.compile(r"\bntp server\b", re.I), "NTP server"), + (re.compile(r"\bservice timestamps\b", re.I), "log timestamps"), + (re.compile(r"\blogging\s+\S+", re.I), "logging destination or buffer"), + (re.compile(r"\bsnmp-server group\s+\S+\s+v3\s+priv\b", re.I), "SNMPv3 authPriv group"), + (re.compile(r"\bbanner\s+(login|motd)\b", re.I), "login banner"), +] + +def check_security(config: str) -> list[str]: + return [message for pattern, message in SECURITY_PATTERNS if pattern.search(config)] + +def check_missing_hygiene(config: str) -> list[str]: + return [ + f"Missing {description}" + for pattern, description in BEST_PRACTICE_PATTERNS + if not pattern.search(config) + ] +``` + +## Examples + +### Change-Window Preflight + +1. Run dangerous-command checks on the exact snippet to be pasted. +2. Run duplicate IP and subnet overlap checks against the full candidate config. +3. Confirm every referenced ACL, route-map, and prefix-list exists. +4. Confirm rollback commands and out-of-band access before any management-plane + change. + +### Automation Preflight + +Use validation as a blocking gate before Netmiko, NAPALM, Ansible, or vendor API +automation pushes a generated config. Fail closed on dangerous commands and +credentials. Warn on best-practice gaps that are outside the change scope. + +## Anti-Patterns + +- Treating regex validation as a device parser. +- Applying generated config without a dry-run diff. +- Recommending SNMPv2 community strings as a monitoring requirement. +- Checking VTY blocks with regex that can accidentally span unrelated sections. +- Testing firewall behavior by disabling ACLs instead of reading counters/logs. + +## See Also + +- Agent: `network-config-reviewer` +- Agent: `network-troubleshooter` +- Skill: `network-interface-health` diff --git a/skills/network-interface-health/SKILL.md b/skills/network-interface-health/SKILL.md new file mode 100644 index 00000000..52b2d084 --- /dev/null +++ b/skills/network-interface-health/SKILL.md @@ -0,0 +1,152 @@ +--- +name: network-interface-health +description: Diagnose interface errors, drops, CRCs, duplex mismatches, flapping, speed negotiation issues, and counter trends on routers, switches, and Linux hosts. +origin: community +--- + +# Network Interface Health + +Use this skill when a network symptom might be caused by a physical link, switch +port, cable, transceiver, duplex setting, or congested interface. + +## When to Use + +- A host or VLAN has packet loss, latency spikes, or intermittent reachability. +- A switch or router interface shows CRCs, runts, giants, drops, resets, or flaps. +- You need to compare both ends of a link before replacing hardware. +- A change window needs before/after interface counter evidence. +- Monitoring reports rising `ifInErrors`, `ifOutErrors`, or `ifOutDiscards`. + +## How It Works + +Interface counters are evidence, but the trend matters more than the absolute +number. Capture a baseline, wait a measurement interval, capture again, then +compare increments. + +```text +show interfaces +show interfaces status +show logging | include |changed state|line protocol +``` + +On Linux hosts: + +```text +ip -s link show +ethtool +ethtool -S +``` + +## Counter Reference + +| Counter | Meaning | Common cause | +| --- | --- | --- | +| CRC | Received frame checksum failed | Bad cable, dirty fiber, bad optic, duplex mismatch | +| input errors | Aggregate receive-side errors | Check sub-counters before concluding | +| runts | Frames below minimum Ethernet size | Duplex mismatch, collision domain, faulty NIC | +| giants | Frames larger than expected MTU | MTU mismatch or jumbo-frame boundary | +| input drops | Device could not accept inbound packets | Burst, oversubscription, CPU path, queue pressure | +| output drops | Egress queue discarded packets | Congestion, QoS policy, undersized uplink | +| resets | Interface hardware reset | Flapping, keepalive, driver, optic, power | +| collisions | Ethernet collision counter | Half duplex or negotiation mismatch | + +## Diagnosis Flow + +### CRCs Or Input Errors + +1. Confirm counters are incrementing, not just historical. +2. Check both ends of the link. Receive-side errors usually point to the signal + arriving on that side, not necessarily the port reporting the error. +3. Replace patch cable or clean/replace fiber and optics. +4. Confirm speed/duplex settings match on both sides. +5. Check logs for flap events around the same timestamp. + +### Drops + +1. Separate input drops from output drops. +2. Compare interface rate against capacity. +3. Check QoS policy, queue counters, and whether the link is an oversubscribed + uplink. +4. Treat queue tuning as secondary. First prove whether the link is congested. + +### Duplex And Speed + +Prefer auto-negotiation on modern Ethernet links when both sides support it. If +one side must be fixed, configure both sides explicitly and document why. Never +mix fixed speed/duplex on one side with auto on the other. + +```text +show interfaces | include duplex|speed +``` + +## Safe Parser Example + +Slice each interface block from one header to the next. Do not use an arbitrary +character window; large interface blocks can cause counters to be missed or +assigned to the wrong port. + +```python +import re +from typing import Any + +HEADER_RE = re.compile( + r"^(?P\S+) is (?P(?:administratively )?down|up), " + r"line protocol is (?Pup|down)", + re.I | re.M, +) +ERROR_RE = re.compile(r"(?P\d+) input errors, (?P\d+) CRC", re.I) +DROP_RE = re.compile(r"(?P\d+) output errors", re.I) +DUPLEX_RE = re.compile(r"(?PFull|Half|Auto)-duplex,\s+(?P[^,]+)", re.I) + +def parse_show_interfaces(raw: str) -> list[dict[str, Any]]: + headers = list(HEADER_RE.finditer(raw)) + interfaces = [] + for index, header in enumerate(headers): + end = headers[index + 1].start() if index + 1 < len(headers) else len(raw) + block = raw[header.start():end] + errors = ERROR_RE.search(block) + drops = DROP_RE.search(block) + duplex = DUPLEX_RE.search(block) + interfaces.append({ + "name": header.group("name"), + "status": header.group("status"), + "protocol": header.group("protocol"), + "duplex": duplex.group("duplex") if duplex else "unknown", + "speed": duplex.group("speed").strip() if duplex else "unknown", + "input_errors": int(errors.group("input")) if errors else 0, + "crc_errors": int(errors.group("crc")) if errors else 0, + "output_errors": int(drops.group("output")) if drops else 0, + }) + return interfaces +``` + +## Examples + +### CRCs On One Switch Port + +1. Capture counters on the local port. +2. Capture counters on the connected remote port. +3. Replace the cable or optic before changing routing or firewall rules. +4. Clear counters only after recording the baseline. +5. Recheck after a fixed interval. + +### Internet Slow But LAN Is Fine + +1. Check WAN interface drops/errors. +2. Check LAN uplink utilization and output drops. +3. Check gateway CPU if the WAN link is clean but throughput is still low. +4. Compare wired and wireless tests before blaming upstream service. + +## Anti-Patterns + +- Clearing counters before saving a baseline. +- Looking at only one side of a link. +- Assuming all historical CRCs are active problems without a time window. +- Mixing auto-negotiation on one side with fixed speed/duplex on the other. +- Treating output drops as a cable problem before checking congestion. + +## See Also + +- Agent: `network-troubleshooter` +- Skill: `network-config-validation` +- Skill: `homelab-network-setup`