使用数据集为Kddcup99的10%数据集。
以下代码需要先把下载下来的文件变成txt格式,然后新建一个
kddcup.data_10_percent_corrected 文件。接着运行即可。文件路径都是绝对路径可以根据自己的文件位置更改。代码为:
import pandas as pd col_names = ["duration","protocol_type","service","flag","src_bytes", "dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins", "logged_in","num_compromised","root_shell","su_attempted","num_root", "num_file_creations","num_shells","num_access_files","num_outbound_cmds", "is_host_login","is_guest_login","count","srv_count","serror_rate", "srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate", "diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count", "dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate", "dst_host_rerror_rate","dst_host_srv_rerror_rate","label"]#42个标识 data = pd.read_table("kddcup.data_10_percent_corrected.txt",header=None, sep=',',names = col_names) print(data.head(10))#查看前10行 data.to_csv("kddcup.data_10_percent_corrected.csv")#另存为csv文件
