[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"doc-detail-31674":3,"doc-seo-31674":27},{"code":4,"msg":5,"data":6},0,"success",{"doc_id":7,"user_id":8,"nickname":9,"user_avatar":10,"doc_module":4,"category_id":11,"category_name":12,"doc_title":13,"doc_description":14,"file_id":15,"file_url":16,"file_type":17,"file_size":18,"view_count":4,"is_deleted":4,"is_public":19,"is_downloadable":19,"audit_status":19,"page_count":20,"language":21,"language_code":22,"table_of_contents":23,"faqs":24,"seo_title":13,"seo_description":14,"update_tm":25,"read_time":26},31674,1099513958762,"Logic","https://ap-avatar.wpscdn.com/avatar/1000023916a998db790?_k=1776737595927829259",8,"Research & Report","CICIDS-2017 Dataset Feature Analysis With Information Gain for Anomaly Detection","Feature selection serves as a core step in data preprocessing for data analytics, where high-dimensional network traffic increases computational complexity, resource usage, and analysis time. This study identifies relevant and significant features to enhance traffic anomaly detection accuracy while reducing execution time. Information Gain ranks and groups features using minimum weight criteria, then evaluates multiple classifiers—Random Forest, Bayes Net, Random Tree, Naive Bayes, and J48—on the CICIDS-2017 dataset. Results confirm that the selected feature count substantially affects accuracy and runtime.","cbCaijjo7lLP53IR","https://ap.wps.com/l/cbCaijjo7lLP53IR","pdf",1046290,1,12,"English","en","# Introduction\n## Problem: high dimensionality and computation cost\n## Feature selection and dimensionality reduction\n## Prior work and need for large-feature testing\n## Role of Information Gain and CICIDS-2017 dataset","[{\"question\":\"Why is feature selection important for anomaly detection in network traffic analytics?\",\"answer\":\"High-dimensional data raises computational complexity, increases resource usage, and prolongs execution time. Feature selection reduces dimensionality, helps interpret data, and improves predictive performance for anomaly detection.\"},{\"question\":\"How does the study use Information Gain to select features?\",\"answer\":\"Information Gain ranks and groups features using minimum weight values, selecting relevant and significant subsets for later classifier evaluation.\"},{\"question\":\"Which classifiers are tested on the CICIDS-2017 dataset, and what performance trends are reported?\",\"answer\":\"Random Forest, Bayes Net, Random Tree, Naive Bayes, and J48 are tested. The results show that the number of relevant selected features strongly influences detection accuracy and execution time, with Random Forest reaching the highest accuracy in the reported experiment and J48 achieving a very close accuracy using more features but longer runtime.\"}]",1779915621,30,{"code":4,"msg":28,"data":29},"ok",{"site_id":30,"language":22,"slug":31,"title":13,"keywords":32,"description":14,"schema_data":33,"social_meta":84,"head_meta":86,"extra_data":88,"updated_unix":25},105,"cicids-2017-dataset-feature-analysis-with-information-gain-for-anomaly-detection","",{"@graph":34,"@context":83},[35,52,66],{"@type":36,"itemListElement":37},"BreadcrumbList",[38,42,46,49],{"item":39,"name":40,"@type":41,"position":19},"https://docshare.wps.com","Home","ListItem",{"item":43,"name":44,"@type":41,"position":45},"https://docshare.wps.com/document/","Document",2,{"item":47,"name":12,"@type":41,"position":48},"https://docshare.wps.com/document/research-report/",3,{"item":50,"name":13,"@type":41,"position":51},"https://docshare.wps.com/document/cicids-2017-dataset-feature-analysis-with-information-gain-for-anomaly-detection/31674/",4,{"url":50,"name":13,"@type":53,"author":54,"headline":13,"publisher":56,"fileFormat":59,"description":14,"dateModified":60,"datePublished":60,"encodingFormat":59,"isAccessibleForFree":61,"interactionStatistic":62},"DigitalDocument",{"name":9,"@type":55},"Person",{"url":39,"name":57,"@type":58},"DocShare","Organization","application/pdf","2026-05-27",true,{"@type":63,"interactionType":64,"userInteractionCount":4},"InteractionCounter",{"@type":65},"ViewAction",{"@type":67,"mainEntity":68},"FAQPage",[69,75,79],{"name":70,"@type":71,"acceptedAnswer":72},"Why is feature selection important for anomaly detection in network traffic analytics?","Question",{"text":73,"@type":74},"High-dimensional data raises computational complexity, increases resource usage, and prolongs execution time. Feature selection reduces dimensionality, helps interpret data, and improves predictive performance for anomaly detection.","Answer",{"name":76,"@type":71,"acceptedAnswer":77},"How does the study use Information Gain to select features?",{"text":78,"@type":74},"Information Gain ranks and groups features using minimum weight values, selecting relevant and significant subsets for later classifier evaluation.",{"name":80,"@type":71,"acceptedAnswer":81},"Which classifiers are tested on the CICIDS-2017 dataset, and what performance trends are reported?",{"text":82,"@type":74},"Random Forest, Bayes Net, Random Tree, Naive Bayes, and J48 are tested. The results show that the number of relevant selected features strongly influences detection accuracy and execution time, with Random Forest reaching the highest accuracy in the reported experiment and J48 achieving a very close accuracy using more features but longer runtime.","https://schema.org",{"og:url":50,"og:type":85,"og:title":13,"og:site_name":57,"og:description":14},"article",{"robots":87,"canonical":50},"index,follow",{"doc_id":7,"site_id":30}]