{"corpus_id":19123459,"paper_sha":"3b7d61f8e7158fdcd126a7c2e814fab04578cf1e","doi":null,"arxiv_id":"1805.01676","pmid":null,"pmcid":null,"mag_id":2964147491,"dblp_id":"conf/lrec/HadiwinotoN18","acl_id":"L18-1003","title":"Upping the Ante: Towards a Better Benchmark for Chinese-to-English Machine Translation","year":2018,"publication_date":"2018-05-04","venue":"International Conference on Language Resources and Evaluation","journal":{"name":"ArXiv","pages":null,"volume":"abs/1805.01676"},"journal_issn":null,"journal_title":null,"publication_types":["JournalArticle"],"pubmed_pub_types":null,"s2_fields_of_study":["Linguistics","Computer Science"],"reference_count":20,"citation_count":4,"influential_citation_count":0,"is_open_access":false,"arxiv_categories":["cs.CL"],"arxiv_license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","arxiv_journal_ref":null,"mesh_headings":null,"chemicals":null,"comments_corrections":null,"source_flags":1,"s2_open_access_pdf_url":null,"s2_open_access_landing_url":null,"s2_open_access_license":null,"s2_open_access_status":null,"pmc_open_access_pdf_url":null,"pmc_open_access_landing_url":null,"pmc_open_access_license":null,"pmc_open_access_status":null,"unpaywall_open_access_pdf_url":null,"unpaywall_open_access_landing_url":null,"unpaywall_open_access_license":null,"unpaywall_open_access_status":null,"abstract":"There are many machine translation (MT) papers that propose novel approaches and show improvements over their self-defined baselines. The experimental setting in each paper often differs from one another. As such, it is hard to determine if a proposed approach is really useful and advances the state of the art. Chinese-to-English translation is a common translation direction in MT papers, although there is not one widely accepted experimental setting in Chinese-to-English MT. Our goal in this paper is to propose a benchmark in evaluation setup for Chinese-to-English machine translation, such that the effectiveness of a new proposed MT approach can be directly compared to previous approaches. Towards this end, we also built a highly competitive state-of-the-art MT system trained on a large-scale training set. Our system outperforms reported results on NIST OpenMT test sets in almost all papers published in major conferences and journals in computational linguistics and artificial intelligence in the past 11 years. We argue that a standardized benchmark on data and performance is important for meaningful comparison.","claims":[{"public_id":"cl_750cb0a015fee1b9ae5e0313f7bcbb27","status":"active","text":"A benchmark evaluation setup for Chinese-to-English machine translation is proposed so new approaches can be directly compared with prior work.","confidence":0.97,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_750cb0a015fee1b9ae5e0313f7bcbb27"},{"public_id":"cl_5946dc1546cede9504cf0eeaf9f5298a","status":"active","text":"A highly competitive state-of-the-art machine translation system is built using a large-scale training set.","confidence":0.93,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_5946dc1546cede9504cf0eeaf9f5298a"},{"public_id":"cl_a161de2c7f8cde45f45113d3bd8054d3","status":"active","text":"A standardized benchmark on data and performance is important for meaningful comparison of Chinese-to-English machine translation approaches.","confidence":0.89,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_a161de2c7f8cde45f45113d3bd8054d3"},{"public_id":"cl_cd1037a36d06919ac581cbbc27c5dd5b","status":"active","text":"The system outperforms reported results on NIST OpenMT test sets in almost all papers published in major computational linguistics and artificial intelligence venues over the past 11 years.","confidence":0.96,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_cd1037a36d06919ac581cbbc27c5dd5b"}],"concepts":[{"public_id":"co_456886bdf15dd1f3c42af21e84b1efa5","status":"active","name":"Chinese-to-English machine translation","description":"Machine translation from Chinese source text into English target text.","types":["task"],"aliases":["Chinese-English machine translation","C2E MT"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_456886bdf15dd1f3c42af21e84b1efa5"},{"public_id":"co_53beaf48d91b1506633cb2962d420de7","status":"active","name":"benchmark","description":"A standardized evaluation setup used to compare machine translation approaches under the same conditions.","types":["evaluation setting"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_53beaf48d91b1506633cb2962d420de7"},{"public_id":"co_689b165503b563f6fd254d68d0975c63","status":"active","name":"state-of-the-art MT system","description":"A machine translation system presented as a top-performing system in the reported experiments.","types":["system"],"aliases":["SOTA MT system"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_689b165503b563f6fd254d68d0975c63"},{"public_id":"co_717c1c9a319e7ae9a3052d6fddfe81e8","status":"active","name":"major conferences and journals in computational linguistics and artificial intelligence","description":"The set of leading publication venues referenced as the source of prior reported results.","types":["publication venues"],"aliases":["major CL and AI venues"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_717c1c9a319e7ae9a3052d6fddfe81e8"},{"public_id":"co_84fb3a99e1f8b3e4a0d34ba2678e794c","status":"active","name":"NIST OpenMT test sets","description":"Standard test sets from the NIST Open Machine Translation evaluation campaign used to assess translation quality.","types":["evaluation dataset"],"aliases":["OpenMT test sets"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_84fb3a99e1f8b3e4a0d34ba2678e794c"},{"public_id":"co_aae50b3825349e3113b8acc998400406","status":"active","name":"large-scale training set","description":"A large collection of parallel or training data used to train the machine translation system.","types":["dataset"],"aliases":["large training set"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_aae50b3825349e3113b8acc998400406"},{"public_id":"co_d70ef6fd1d43a41ad774754eb1b83142","status":"active","name":"reported results","description":"Previously published translation results used as comparison points in the benchmark discussion.","types":["results"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_d70ef6fd1d43a41ad774754eb1b83142"},{"public_id":"co_e993578e1de802d3353d0dc285466594","status":"active","name":"state of the art","description":"The highest reported performance level among compared machine translation systems.","types":["performance level"],"aliases":["SOTA"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_e993578e1de802d3353d0dc285466594"},{"public_id":"co_fa42cb0cbb1e13fb6daf57d535375b58","status":"active","name":"experimental setting","description":"The overall evaluation configuration, including data and measurement conditions, used in an MT paper.","types":["evaluation setting"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_fa42cb0cbb1e13fb6daf57d535375b58"}],"external_ids":{"DOI":null,"ArXiv":"1805.01676","PubMed":null,"PubMedCentral":null,"MAG":2964147491,"DBLP":"conf/lrec/HadiwinotoN18","ACL":"L18-1003"},"open_access":{"is_open_access":true,"pdf_url":"https://arxiv.org/pdf/1805.01676","landing_url":"https://arxiv.org/abs/1805.01676","source":"arxiv","pdf_url_source":"derived_arxiv","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","reason":null},"reference_availability":{"status":"available","references_indexed":true,"full_text_available":true,"full_text_source":"arxiv","count_basis":"semantic_scholar_metadata","extraction_status":"not_applicable","reason":null},"source":{"provider":"episteme2","base_corpus":"semantic_scholar_dump","freshness_mode":"unknown","basis":["semantic_scholar_metadata","postgres_metadata"],"limits":["paper metadata is based on indexed upstream scholarly datasets","claims and concepts are available only for extracted papers","absence of claims or concepts means no extracted graph data is available in this response"],"status":"available","degraded":false,"degraded_reasons":[],"diagnostics":{"status":"available","degraded":false,"degraded_reasons":[],"metadata_status":"available","graph_status":"available","abstract_status":"available"},"source_flags":1},"paper_id":632168,"paper_uid":"855bd543-3d25-45eb-baf1-55b0d84f4c07","canonical_identity":{"paper_id":632168,"paper_uid":"855bd543-3d25-45eb-baf1-55b0d84f4c07","identity_status":"available","lookup_basis":"semantic_scholar_external_id","compatibility_path":"corpus_id"},"url":"https://sah.borca.ai/papers/19123459"}