{"corpus_id":1475794,"paper_sha":"1e384bcc83530b5a8cf84e59cb16d3343f40a5be","doi":"10.1186/1471-2105-10-S1-S42","arxiv_id":null,"pmid":19208144,"pmcid":"2648739","mag_id":2014216829,"dblp_id":"journals/bmcbi/HanZF09","acl_id":null,"title":"Predicting disordered regions in proteins using the profiles of amino acid indices","year":2009,"publication_date":"2009-01-30","venue":"BMC Bioinformatics","journal":{"name":"BMC Bioinformatics","pages":"S42 - S42","volume":"10"},"journal_issn":null,"journal_title":null,"publication_types":["JournalArticle"],"pubmed_pub_types":["Journal Article"],"s2_fields_of_study":["Biology","Medicine","Computer Science"],"reference_count":41,"citation_count":25,"influential_citation_count":2,"is_open_access":true,"arxiv_categories":null,"arxiv_license":null,"arxiv_journal_ref":null,"mesh_headings":[{"d":"Algorithms","mj":true,"ui":"D000465"},{"d":"Amino Acids","mj":false,"qs":[{"q":"chemistry","mj":true,"ui":"Q000737"}],"ui":"D000596"},{"d":"Artificial Intelligence","mj":true,"ui":"D001185"},{"d":"Databases, Protein","mj":false,"ui":"D030562"},{"d":"Proteins","mj":false,"qs":[{"q":"chemistry","mj":true,"ui":"Q000737"}],"ui":"D011506"},{"d":"Sequence Analysis, Protein","mj":false,"qs":[{"q":"methods","mj":false,"ui":"Q000379"}],"ui":"D020539"}],"chemicals":[{"n":"Amino Acids","ui":"D000596","reg":"0"},{"n":"Proteins","ui":"D011506","reg":"0"}],"comments_corrections":null,"source_flags":5,"s2_open_access_pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-10-S1-S42","s2_open_access_landing_url":"https://www.semanticscholar.org/paper/1e384bcc83530b5a8cf84e59cb16d3343f40a5be","s2_open_access_license":"CCBY","s2_open_access_status":"GOLD","pmc_open_access_pdf_url":null,"pmc_open_access_landing_url":null,"pmc_open_access_license":null,"pmc_open_access_status":null,"unpaywall_open_access_pdf_url":null,"unpaywall_open_access_landing_url":null,"unpaywall_open_access_license":null,"unpaywall_open_access_status":null,"abstract":"BackgroundIntrinsically unstructured or disordered proteins are common and functionally important. Prediction of disordered regions in proteins can provide useful information for understanding protein function and for high-throughput determination of protein structures.ResultsIn this paper, algorithms are presented to predict long and short disordered regions in proteins, namely the long disordered region prediction algorithm DRaai-L and the short disordered region prediction algorithm DRaai-S. These algorithms are developed based on the Random Forest machine learning model and the profiles of amino acid indices representing various physiochemical and biochemical properties of the 20 amino acids.ConclusionExperiments on DisProt3.6 and CASP7 demonstrate that some sets of the amino acid indices have strong association with the ordered and disordered status of residues. Our algorithms based on the profiles of these amino acid indices as input features to predict disordered regions in proteins outperform that based on amino acid composition and reduced amino acid composition, and also outperform many existing algorithms. Our studies suggest that the profiles of amino acid indices combined with the Random Forest learning model is an important complementary method for pinpointing disordered regions in proteins.","claims":[{"public_id":"cl_7a555a44ea121cea371d71019f5b49db","status":"active","text":"DRaai-L and DRaai-S are Random Forest-based algorithms that use profiles of amino acid indices to predict long and short disordered regions in proteins.","confidence":0.98,"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_7a555a44ea121cea371d71019f5b49db"},{"public_id":"cl_5b454fa121ce5b6f216bdaba92143e42","status":"active","text":"DRaai-L and DRaai-S outperform methods based on amino acid composition and reduced amino acid composition, as well as many existing algorithms.","confidence":0.95,"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_5b454fa121ce5b6f216bdaba92143e42"},{"public_id":"cl_ee20d80381fd0e5d0bbb5f46ee656d33","status":"active","text":"Experiments on DisProt3.6 and CASP7 show that some sets of amino acid indices are strongly associated with the ordered and disordered status of residues.","confidence":0.92,"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_ee20d80381fd0e5d0bbb5f46ee656d33"},{"public_id":"cl_09f0f66adf195835ce8099f289f04710","status":"active","text":"Profiles of amino acid indices combined with the Random Forest learning model form an important complementary method for pinpointing disordered regions in proteins.","confidence":0.89,"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_09f0f66adf195835ce8099f289f04710"}],"concepts":[{"public_id":"co_0250ba298d943296f68ff4fee58561c7","status":"active","name":"amino acid composition","description":"A baseline representation that encodes the overall frequency of amino acids in a protein sequence.","types":["baseline"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_0250ba298d943296f68ff4fee58561c7"},{"public_id":"co_167a7adeeb2c0e4eb3125106011ae38f","status":"active","name":"Random Forest machine learning model","description":"A machine learning model used here as the classifier for disorder-region prediction.","types":["model"],"aliases":["Random Forest"],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_167a7adeeb2c0e4eb3125106011ae38f"},{"public_id":"co_2faa7254a1c268d6413f95931d46adcd","status":"active","name":"amino acid indices","description":"Numeric indices that encode physicochemical and biochemical properties of the 20 amino acids.","types":["feature set"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_2faa7254a1c268d6413f95931d46adcd"},{"public_id":"co_53387a46bb1b55469bccfb79c9fd9e18","status":"active","name":"DRaai-L","description":"A Random Forest-based algorithm designed to predict long disordered regions in proteins.","types":["method"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_53387a46bb1b55469bccfb79c9fd9e18"},{"public_id":"co_58a98e55c41a61bf855741ac33bd0635","status":"active","name":"DRaai-S","description":"A Random Forest-based algorithm designed to predict short disordered regions in proteins.","types":["method"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_58a98e55c41a61bf855741ac33bd0635"},{"public_id":"co_7252c46c2c5e2d89ab286fd4d617a82a","status":"active","name":"reduced amino acid composition","description":"A reduced baseline representation that groups amino acids into fewer categories before computing composition.","types":["baseline"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_7252c46c2c5e2d89ab286fd4d617a82a"},{"public_id":"co_7734d4a2dd9beadae9ca053b31dbfb12","status":"active","name":"disordered regions in proteins","description":"Protein regions lacking a stable ordered structure, used as the target of prediction in this work.","types":["target"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_7734d4a2dd9beadae9ca053b31dbfb12"},{"public_id":"co_9c298bf46726b0aef6603c0dc3c760fd","status":"active","name":"short disordered regions","description":"Protein segments that are predicted or experimentally annotated as short disordered regions.","types":["region type"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_9c298bf46726b0aef6603c0dc3c760fd"},{"public_id":"co_a2c14841009eac293cb9b8ef5355feec","status":"active","name":"DisProt3.6","description":"The DisProt 3.6 benchmark dataset used for evaluating disorder prediction methods.","types":["dataset"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_a2c14841009eac293cb9b8ef5355feec"},{"public_id":"co_ad8281d4baa75ccca986f3ed2a72a1de","status":"active","name":"profiles of amino acid indices","description":"Feature vectors built from amino acid index profiles representing physicochemical and biochemical properties of residues.","types":["feature representation"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_ad8281d4baa75ccca986f3ed2a72a1de"},{"public_id":"co_c0dc24d045ca82aa5520bb7a6bf7abf9","status":"active","name":"long disordered regions","description":"Protein segments that are predicted or experimentally annotated as long disordered regions.","types":["region type"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_c0dc24d045ca82aa5520bb7a6bf7abf9"},{"public_id":"co_e9c09b7be92d132d0be3334c9830e8fd","status":"active","name":"ordered and disordered status of residues","description":"The ordered or disordered annotation assigned to protein residues in the evaluation setting.","types":["label"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_e9c09b7be92d132d0be3334c9830e8fd"},{"public_id":"co_ea9ed223b34b50f88d471303fa942f5f","status":"active","name":"CASP7","description":"The CASP7 benchmark dataset used for evaluating disorder prediction methods.","types":["dataset"],"aliases":[],"contributors":[{"id":391,"public_id":"x53qfq3ny9","public_label":"kafkapple (x53qfq3ny9)","roles":["extraction"],"url":"https://sah.borca.ai/u/x53qfq3ny9"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_ea9ed223b34b50f88d471303fa942f5f"}],"external_ids":{"DOI":"10.1186/1471-2105-10-S1-S42","ArXiv":null,"PubMed":19208144,"PubMedCentral":"2648739","MAG":2014216829,"DBLP":"journals/bmcbi/HanZF09","ACL":null},"open_access":{"is_open_access":true,"pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-10-S1-S42","landing_url":"https://www.semanticscholar.org/paper/1e384bcc83530b5a8cf84e59cb16d3343f40a5be","source":"semantic_scholar","pdf_url_source":"semantic_scholar_open_access_pdf","license":"CCBY","status":"GOLD","reason":null},"reference_availability":{"status":"available","references_indexed":true,"full_text_available":false,"full_text_source":null,"count_basis":"semantic_scholar_metadata","extraction_status":"not_applicable","reason":null},"source":{"provider":"episteme2","base_corpus":"semantic_scholar_dump","freshness_mode":"unknown","basis":["semantic_scholar_metadata","postgres_metadata"],"limits":["paper metadata is based on indexed upstream scholarly datasets","claims and concepts are available only for extracted papers","absence of claims or concepts means no extracted graph data is available in this response"],"status":"available","degraded":false,"degraded_reasons":[],"diagnostics":{"status":"available","degraded":false,"degraded_reasons":[],"metadata_status":"available","graph_status":"available","abstract_status":"available"},"source_flags":5},"paper_id":631334,"paper_uid":"cf231276-6f19-417d-ad95-8f7a087f48e0","canonical_identity":{"paper_id":631334,"paper_uid":"cf231276-6f19-417d-ad95-8f7a087f48e0","identity_status":"available","lookup_basis":"semantic_scholar_external_id","compatibility_path":"corpus_id"},"url":"https://sah.borca.ai/papers/1475794"}