{"corpus_id":52198931,"paper_sha":"d82b7d756f3ffac2949178085bef64a426846ae9","doi":"10.1145/3209280.3229108","arxiv_id":null,"pmid":null,"pmcid":null,"mag_id":2894887592,"dblp_id":"conf/doceng/CarterDCM18","acl_id":null,"title":"FormYak: Converting forms to conversations","year":2018,"publication_date":"2018-08-28","venue":"ACM Symposium on Document Engineering","journal":{"name":"Proceedings of the ACM Symposium on Document Engineering 2018","pages":null,"volume":null},"journal_issn":null,"journal_title":null,"publication_types":["JournalArticle","Book"],"pubmed_pub_types":null,"s2_fields_of_study":["Computer Science"],"reference_count":6,"citation_count":2,"influential_citation_count":0,"is_open_access":false,"arxiv_categories":null,"arxiv_license":null,"arxiv_journal_ref":null,"mesh_headings":null,"chemicals":null,"comments_corrections":null,"source_flags":1,"s2_open_access_pdf_url":null,"s2_open_access_landing_url":null,"s2_open_access_license":null,"s2_open_access_status":null,"pmc_open_access_pdf_url":null,"pmc_open_access_landing_url":null,"pmc_open_access_license":null,"pmc_open_access_status":null,"unpaywall_open_access_pdf_url":null,"unpaywall_open_access_landing_url":null,"unpaywall_open_access_license":null,"unpaywall_open_access_status":null,"abstract":"Historically, people have interacted with companies and institutions through telephone-based dialogue systems and paper-based forms. Now, these interactions are rapidly moving to web- and phone-based chat systems. While converting traditional telephone dialogues to chat is relatively straightforward, converting forms to conversational interfaces can be challenging. In this work, we introduce methods and interfaces to enable the conversion of PDF and web-based documents that solicit user input into chat-based dialogues. Document data is first extracted to associate fields and their textual descriptions using metadata and lightweight visual analysis. The field labels, their spatial layout, and associated text are further analyzed to group related fields into natural conversational units. These correspond to questions presented to users in chat interfaces to solicit information needed to complete the original documents and downstream processes they support. This user supplied data can be inserted into the source documents and/or in downstream databases. User studies of our tool show that it streamlines form-to-chat conversion and produces conversational dialogues of at least the same quality as a purely manual approach.","claims":[{"public_id":"cl_f18ccccddb38fe95d1b3aee50967b20a","status":"active","text":"Document data is extracted by associating fields with their textual descriptions using metadata and lightweight visual analysis.","confidence":0.95,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_f18ccccddb38fe95d1b3aee50967b20a"},{"public_id":"cl_72bfc1f863d211e4b9824f4dd6e0b6b5","status":"active","text":"Field labels, spatial layout, and associated text are analyzed to group related fields into natural conversational units.","confidence":0.96,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_72bfc1f863d211e4b9824f4dd6e0b6b5"},{"public_id":"cl_862ecf1acf4d652c8f7c7bfea80fc439","status":"active","text":"Methods and interfaces are introduced for converting PDF and web-based documents that solicit user input into chat-based dialogues.","confidence":0.98,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_862ecf1acf4d652c8f7c7bfea80fc439"},{"public_id":"cl_b70215de8691beaf12b9e589dbf06de6","status":"active","text":"User studies indicate that the tool streamlines form-to-chat conversion and yields conversational dialogues of at least the same quality as a purely manual approach.","confidence":0.94,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_b70215de8691beaf12b9e589dbf06de6"}],"concepts":[{"public_id":"co_0ce6c3f20e1aa5459144208505374d0f","status":"active","name":"chat-based dialogues","description":"Conversational interfaces where information is collected through message exchanges.","types":["interface"],"aliases":["chat interfaces"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_0ce6c3f20e1aa5459144208505374d0f"},{"public_id":"co_31e5d7fec77ee4246d54945b1dee50aa","status":"active","name":"purely manual approach","description":"A baseline approach in which form-to-chat conversion is performed entirely by hand.","types":["baseline"],"aliases":["manual approach"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_31e5d7fec77ee4246d54945b1dee50aa"},{"public_id":"co_42ffc66e3294622f5dd167c1aec39e2d","status":"active","name":"user studies","description":"Evaluations involving users interacting with the FormYak tool.","types":["evaluation method"],"aliases":["study with users"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_42ffc66e3294622f5dd167c1aec39e2d"},{"public_id":"co_65fc7adc0aea153171d804d700b57d69","status":"active","name":"natural conversational units","description":"Groups of related form fields organized into question-sized pieces for conversation.","types":["representation"],"aliases":["conversational units"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_65fc7adc0aea153171d804d700b57d69"},{"public_id":"co_6fdd836ff2590c1b2e8d62dc63304eb6","status":"active","name":"metadata","description":"Structured auxiliary information used to help identify and relate document fields.","types":["data"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_6fdd836ff2590c1b2e8d62dc63304eb6"},{"public_id":"co_6ff56d00b3f7d7e00c30f565e0fff335","status":"active","name":"spatial layout","description":"The positional arrangement of fields and text on the document page.","types":["document structure"],"aliases":["layout"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_6ff56d00b3f7d7e00c30f565e0fff335"},{"public_id":"co_85a13681d394f09763f83182990e9b72","status":"active","name":"lightweight visual analysis","description":"A visual analysis process that uses limited document layout cues to support field extraction.","types":["method"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_85a13681d394f09763f83182990e9b72"},{"public_id":"co_a429e50b7598fdf721ec25bf5acfbb02","status":"active","name":"methods and interfaces","description":"The techniques and user-facing components used to transform documents into chat interactions.","types":["method","interface"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_a429e50b7598fdf721ec25bf5acfbb02"},{"public_id":"co_a781063564303dd7d35c517c5d5c5f37","status":"active","name":"form-to-chat conversion","description":"The process of transforming forms into conversational chat-based interactions.","types":["conversion task"],"aliases":["form conversion to chat"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_a781063564303dd7d35c517c5d5c5f37"},{"public_id":"co_b12b1786500b1161924bd001548c5c1d","status":"active","name":"field labels","description":"Text labels identifying form fields in the source documents.","types":["text element"],"aliases":["labels"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_b12b1786500b1161924bd001548c5c1d"},{"public_id":"co_c00014c13758e851fabb340310354c4d","status":"active","name":"PDF and web-based documents","description":"Documents in PDF or web format that solicit user input through fields or prompts.","types":["document"],"aliases":["forms","documents"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_c00014c13758e851fabb340310354c4d"},{"public_id":"co_dd1d42a5bfe67ad455c1c114e6a66814","status":"active","name":"document data","description":"Information extracted from the source documents, including fields and their descriptions.","types":["data"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_dd1d42a5bfe67ad455c1c114e6a66814"},{"public_id":"co_f8ac1e80f483d93fb2aae53be7a66c9a","status":"active","name":"FormYak","description":"A system for converting forms into conversational chat interfaces.","types":["system"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_f8ac1e80f483d93fb2aae53be7a66c9a"}],"external_ids":{"DOI":"10.1145/3209280.3229108","ArXiv":null,"PubMed":null,"PubMedCentral":null,"MAG":2894887592,"DBLP":"conf/doceng/CarterDCM18","ACL":null},"open_access":{"is_open_access":false,"pdf_url":null,"landing_url":"https://sah.borca.ai/papers/52198931","source":null,"pdf_url_source":null,"license":null,"reason":"pdf_url_not_indexed"},"reference_availability":{"status":"available","references_indexed":true,"full_text_available":false,"full_text_source":null,"count_basis":"semantic_scholar_metadata","extraction_status":"not_applicable","reason":null},"source":{"provider":"episteme2","base_corpus":"semantic_scholar_dump","freshness_mode":"unknown","basis":["semantic_scholar_metadata","postgres_metadata"],"limits":["paper metadata is based on indexed upstream scholarly datasets","claims and concepts are available only for extracted papers","absence of claims or concepts means no extracted graph data is available in this response"],"status":"available","degraded":false,"degraded_reasons":[],"diagnostics":{"status":"available","degraded":false,"degraded_reasons":[],"metadata_status":"available","graph_status":"available","abstract_status":"available"},"source_flags":1},"paper_id":632005,"paper_uid":"4917b448-818e-48c5-9742-e075adeb349e","canonical_identity":{"paper_id":632005,"paper_uid":"4917b448-818e-48c5-9742-e075adeb349e","identity_status":"available","lookup_basis":"semantic_scholar_external_id","compatibility_path":"corpus_id"},"url":"https://sah.borca.ai/papers/52198931"}