{"corpus_id":6092484,"paper_sha":"c3a39721e079eb4baa3d286b738bf822007c20d7","doi":"10.1145/2976749.2978370","arxiv_id":null,"pmid":null,"pmcid":null,"mag_id":2532962075,"dblp_id":"conf/ccs/FengZXCTY16","acl_id":null,"title":"Scalable Graph-based Bug Search for Firmware Images","year":2016,"publication_date":"2016-10-24","venue":"Conference on Computer and Communications Security","journal":{"name":"Proceedings of the 2016 ACM SIGSAC Conference on Computer and Communications Security","pages":null,"volume":null},"journal_issn":null,"journal_title":null,"publication_types":["Book","JournalArticle","Conference"],"pubmed_pub_types":null,"s2_fields_of_study":["Computer Science","Engineering"],"reference_count":67,"citation_count":448,"influential_citation_count":77,"is_open_access":false,"arxiv_categories":null,"arxiv_license":null,"arxiv_journal_ref":null,"mesh_headings":null,"chemicals":null,"comments_corrections":null,"source_flags":1,"s2_open_access_pdf_url":null,"s2_open_access_landing_url":null,"s2_open_access_license":null,"s2_open_access_status":null,"pmc_open_access_pdf_url":null,"pmc_open_access_landing_url":null,"pmc_open_access_license":null,"pmc_open_access_status":null,"unpaywall_open_access_pdf_url":null,"unpaywall_open_access_landing_url":null,"unpaywall_open_access_license":null,"unpaywall_open_access_status":null,"abstract":"Because of rampant security breaches in IoT devices, searching vulnerabilities in massive IoT ecosystems is more crucial than ever. Recent studies have demonstrated that control-flow graph (CFG) based bug search techniques can be effective and accurate in IoT devices across different architectures. However, these CFG-based bug search approaches are far from being scalable to handle an enormous amount of IoT devices in the wild, due to their expensive graph matching overhead. Inspired by rich experience in image and video search, we propose a new bug search scheme which addresses the scalability challenge in existing cross-platform bug search techniques and further improves search accuracy. Unlike existing techniques that directly conduct searches based upon raw features (CFGs) from the binary code, we convert the CFGs into high-level numeric feature vectors. Compared with the CFG feature, high-level numeric feature vectors are more robust to code variation across different architectures, and can easily achieve realtime search by using state-of-the-art hashing techniques. We have implemented a bug search engine, Genius, and compared it with state-of-art bug search approaches. Experimental results show that Genius outperforms baseline approaches for various query loads in terms of speed and accuracy. We also evaluated Genius on a real-world dataset of 33,045 devices which was collected from public sources and our system. The experiment showed that Genius can finish a search within 1 second on average when performed over 8,126 firmware images of 420,558,702 functions. By only looking at the top 50 candidates in the search result, we found 38 potentially vulnerable firmware images across 5 vendors, and confirmed 23 of them by our manual analysis. We also found that it took only 0.1 seconds on average to finish searching for all 154 vulnerabilities in two latest commercial firmware images from D-LINK. 103 of them are potentially vulnerable in these images, and 16 of them were confirmed.","claims":[{"public_id":"cl_7e5b31a074fbe47d98bfef1f73d00f26","status":"active","text":"Genius converts control-flow graphs from binary code into high-level numeric feature vectors for scalable cross-platform bug search in firmware images.","confidence":0.96,"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_7e5b31a074fbe47d98bfef1f73d00f26"},{"public_id":"cl_68b387502d349a1abdd196b957140ecf","status":"active","text":"Genius outperforms baseline bug search approaches across various query loads in both speed and accuracy.","confidence":0.93,"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_68b387502d349a1abdd196b957140ecf"},{"public_id":"cl_6ca6f4f108f7fcd301ea47db4b4da28f","status":"active","text":"High-level numeric feature vectors are described as more robust to code variation across different architectures than raw control-flow graph features and compatible with realtime hashing-based search.","confidence":0.9,"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_6ca6f4f108f7fcd301ea47db4b4da28f"},{"public_id":"cl_cea61cf7836ee17ef7249baf49e046de","status":"active","text":"Inspecting the top 50 search candidates identified 38 potentially vulnerable firmware images across 5 vendors, with 23 confirmed by manual analysis.","confidence":0.97,"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_cea61cf7836ee17ef7249baf49e046de"},{"public_id":"cl_1e5e96d8583b63a07339048a8a257478","status":"active","text":"On a real-world dataset of 33,045 devices, Genius averaged under 1 second per search over 8,126 firmware images containing 420,558,702 functions.","confidence":0.98,"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_1e5e96d8583b63a07339048a8a257478"},{"public_id":"cl_732e40ff238b6b25b540683bc1ea404d","status":"active","text":"Searching for all 154 vulnerabilities in two latest commercial firmware images from D-LINK took 0.1 seconds on average, identifying 103 potentially vulnerable cases and confirming 16 of them.","confidence":0.97,"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_732e40ff238b6b25b540683bc1ea404d"}],"concepts":[{"public_id":"co_0a1c59c7bbfbd7db246db9b2a3a39998","status":"active","name":"query loads","description":"Different search workload levels used to evaluate bug search speed and accuracy.","types":["evaluation condition"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_0a1c59c7bbfbd7db246db9b2a3a39998"},{"public_id":"co_0b89d41748e2ec6b4d4ce1daed30efad","status":"active","name":"top 50 candidates","description":"The highest-ranked search results examined to identify potentially vulnerable firmware images.","types":["evaluation output"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_0b89d41748e2ec6b4d4ce1daed30efad"},{"public_id":"co_3f60d1ff609fac4d2bf0b309595937b9","status":"active","name":"154 vulnerabilities","description":"A set of known vulnerabilities searched for in two commercial D-LINK firmware images.","types":["query set"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_3f60d1ff609fac4d2bf0b309595937b9"},{"public_id":"co_4f7434db8a267b9e6cf722288c7be5b6","status":"active","name":"Genius","description":"A bug search engine implemented to search for vulnerabilities in firmware images using graph-derived numeric features.","types":["system"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_4f7434db8a267b9e6cf722288c7be5b6"},{"public_id":"co_54e27e05eea0f1afa8dbe3791def69a2","status":"active","name":"real-world dataset of 33,045 devices","description":"A collection of IoT devices gathered from public sources and the authors' system for evaluating firmware bug search.","types":["dataset"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_54e27e05eea0f1afa8dbe3791def69a2"},{"public_id":"co_8519a24be0bf837f413699c98b2e296b","status":"active","name":"potentially vulnerable firmware images","description":"Firmware images flagged by the search results as likely containing known vulnerabilities.","types":["result category"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_8519a24be0bf837f413699c98b2e296b"},{"public_id":"co_be84451eb753af8a2559cf3569c0605b","status":"active","name":"state-of-the-art hashing techniques","description":"Hashing methods used to support realtime search over numeric feature vectors.","types":["method"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_be84451eb753af8a2559cf3569c0605b"},{"public_id":"co_d3c52d5f6ac143cc9531efa3d73b9bac","status":"active","name":"firmware images","description":"Binary software images from IoT devices that are searched for vulnerable functions.","types":["data object"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_d3c52d5f6ac143cc9531efa3d73b9bac"},{"public_id":"co_e41f082b02d6a80943c27420df63ee5e","status":"active","name":"code variation across different architectures","description":"Differences in binary code caused by compiling or running software across multiple hardware architectures.","types":["challenge"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_e41f082b02d6a80943c27420df63ee5e"},{"public_id":"co_e45111da711d69648727adfacfd3ac63","status":"active","name":"high-level numeric feature vectors","description":"Numeric vector representations converted from control-flow graphs for firmware bug search.","types":["feature representation"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_e45111da711d69648727adfacfd3ac63"},{"public_id":"co_f0e6b79ec50ad3432ccbe38108e73b25","status":"active","name":"control-flow graphs","description":"Graph representations of binary code structure used as raw features in existing bug search techniques.","types":["representation"],"aliases":["CFGs"],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_f0e6b79ec50ad3432ccbe38108e73b25"},{"public_id":"co_f60b3ae22e3b9a32fa14eda81309262e","status":"active","name":"baseline bug search approaches","description":"Existing state-of-the-art bug search methods used as comparison points for evaluating Genius.","types":["baseline"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_f60b3ae22e3b9a32fa14eda81309262e"},{"public_id":"co_f92eac7b598e7b70dc341d63de4e846f","status":"active","name":"commercial firmware images from D-LINK","description":"Two latest D-LINK firmware images used as a commercial evaluation target.","types":["evaluation target"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_f92eac7b598e7b70dc341d63de4e846f"},{"public_id":"co_fb74174826f25536fca0084acd8a82d4","status":"active","name":"manual analysis","description":"Human verification used to confirm whether flagged firmware images were vulnerable.","types":["validation method"],"aliases":[],"contributors":[{"id":136,"public_id":"3c2apqe3ut","public_label":"Anonymous (3c2apqe3ut)","roles":["extraction"],"url":"https://sah.borca.ai/u/3c2apqe3ut"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_fb74174826f25536fca0084acd8a82d4"}],"external_ids":{"DOI":"10.1145/2976749.2978370","ArXiv":null,"PubMed":null,"PubMedCentral":null,"MAG":2532962075,"DBLP":"conf/ccs/FengZXCTY16","ACL":null},"open_access":{"is_open_access":false,"pdf_url":null,"landing_url":"https://sah.borca.ai/papers/6092484","source":null,"pdf_url_source":null,"license":null,"reason":"pdf_url_not_indexed"},"reference_availability":{"status":"available","references_indexed":true,"full_text_available":false,"full_text_source":null,"count_basis":"semantic_scholar_metadata","extraction_status":"not_applicable","reason":null},"source":{"provider":"episteme2","base_corpus":"semantic_scholar_dump","freshness_mode":"unknown","basis":["semantic_scholar_metadata","postgres_metadata"],"limits":["paper metadata is based on indexed upstream scholarly datasets","claims and concepts are available only for extracted papers","absence of claims or concepts means no extracted graph data is available in this response"],"status":"available","degraded":false,"degraded_reasons":[],"diagnostics":{"status":"available","degraded":false,"degraded_reasons":[],"metadata_status":"available","graph_status":"available","abstract_status":"available"},"source_flags":1},"paper_id":630749,"paper_uid":"26dc0421-c25d-457b-acd1-af8fddaf0dcb","canonical_identity":{"paper_id":630749,"paper_uid":"26dc0421-c25d-457b-acd1-af8fddaf0dcb","identity_status":"available","lookup_basis":"semantic_scholar_external_id","compatibility_path":"corpus_id"},"url":"https://sah.borca.ai/papers/6092484"}