{"corpus_id":63206654,"paper_sha":"ff22b793dcb12fd20393b4bfaf6ee2a823ea880a","doi":"10.1016/J.PROTCY.2016.05.140","arxiv_id":null,"pmid":null,"pmcid":null,"mag_id":2474174150,"dblp_id":null,"acl_id":null,"title":"A Novel Segmentation and Skew Correction Approach for Handwritten Malayalam Documents","year":2016,"publication_date":null,"venue":"","journal":{"name":"Procedia Technology","pages":"1341-1348","volume":"24"},"journal_issn":null,"journal_title":null,"publication_types":[],"pubmed_pub_types":null,"s2_fields_of_study":["Computer Science"],"reference_count":19,"citation_count":14,"influential_citation_count":0,"is_open_access":false,"arxiv_categories":null,"arxiv_license":null,"arxiv_journal_ref":null,"mesh_headings":null,"chemicals":null,"comments_corrections":null,"source_flags":1,"s2_open_access_pdf_url":null,"s2_open_access_landing_url":null,"s2_open_access_license":null,"s2_open_access_status":null,"pmc_open_access_pdf_url":null,"pmc_open_access_landing_url":null,"pmc_open_access_license":null,"pmc_open_access_status":null,"unpaywall_open_access_pdf_url":null,"unpaywall_open_access_landing_url":null,"unpaywall_open_access_license":null,"unpaywall_open_access_status":null,"abstract":"Abstract Segmentation of handwritten document images is a complex task due to the variability in the writing styles. The segmentation technique has to deal with non-uniformly skewed, overlapped and touching lines. A very few works have been carried out yet, addressing these issues. This paper presents a novel methodology for segmenting handwritten Malayalam documents into its constituent lines, words and characters addressing the issues mentioned. Water flow technique is used in extraction of text lines. An algorithm has been proposed for dealing with touching and overlapping lines. Words from the text lines are detected using Spiral Run Length Smearing Algorithm (SRLSA). Further, skew correction is done on extracted words and the skew corrected words are produced for character segmentation. Skew correction is incorporated for ease of the recognition stage in handwritten Malayalam OCR.","claims":[{"public_id":"cl_859f5e3d16537ad9f6f3747039a31869","status":"active","text":"A novel methodology segments handwritten Malayalam documents into text lines, words, and characters while addressing non-uniform skew, overlapping lines, and touching lines.","confidence":0.98,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_859f5e3d16537ad9f6f3747039a31869"},{"public_id":"cl_78a324ac606f2c4ba2d534e5ca3d2630","status":"active","text":"Skew correction is applied to extracted words before character segmentation to support handwritten Malayalam OCR recognition.","confidence":0.96,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_78a324ac606f2c4ba2d534e5ca3d2630"},{"public_id":"cl_35b565a139c17ded4a23f8f50b1cd706","status":"active","text":"The Spiral Run Length Smearing Algorithm is used to detect words from the extracted text lines.","confidence":0.97,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_35b565a139c17ded4a23f8f50b1cd706"},{"public_id":"cl_760648bd3caa4a63d914051ef713fe9e","status":"active","text":"The water flow technique is used to extract text lines from handwritten document images.","confidence":0.95,"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/claims/cl_760648bd3caa4a63d914051ef713fe9e"}],"concepts":[{"public_id":"co_55bee378cbaf3fb43aefb4f6ff2b8042","status":"active","name":"words","description":"Word-level segments detected from handwritten text lines.","types":["text unit"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_55bee378cbaf3fb43aefb4f6ff2b8042"},{"public_id":"co_56bfdfe7a93d36634397b72075b50134","status":"active","name":"handwritten document images","description":"Images of documents containing handwritten text used as input for segmentation.","types":["image data"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_56bfdfe7a93d36634397b72075b50134"},{"public_id":"co_58b8462265c22db034bea6d33474d9b7","status":"active","name":"Spiral Run Length Smearing Algorithm","description":"A word-detection algorithm applied to handwritten text lines.","types":["algorithm"],"aliases":["SRLSA"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_58b8462265c22db034bea6d33474d9b7"},{"public_id":"co_5d76e31038fd8445769306732f963b05","status":"active","name":"text lines","description":"Continuous horizontal groups of handwritten text that form line-level units in a document.","types":["text unit"],"aliases":["lines"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_5d76e31038fd8445769306732f963b05"},{"public_id":"co_7a4afa9d074c1b52f323856971bb97af","status":"active","name":"character segmentation","description":"The process of splitting a word image into individual character components.","types":["segmentation task"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_7a4afa9d074c1b52f323856971bb97af"},{"public_id":"co_902aa074c71c008319c68b8fb3b4fb61","status":"active","name":"non-uniformly skewed, overlapped and touching lines","description":"Line configurations in handwritten documents where text lines are tilted unevenly, overlap, or touch each other.","types":["document image property"],"aliases":["touching lines","overlapped lines","non-uniform skew"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_902aa074c71c008319c68b8fb3b4fb61"},{"public_id":"co_9301f5d4037b8a55e89f5a22793bf020","status":"active","name":"handwritten Malayalam OCR","description":"Optical character recognition for handwritten Malayalam text.","types":["application"],"aliases":["Malayalam OCR"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_9301f5d4037b8a55e89f5a22793bf020"},{"public_id":"co_ad844453ca180a20ba5bff8fc26c4995","status":"active","name":"segmentation methodology","description":"An approach for dividing handwritten document images into lines, words, and characters.","types":["method"],"aliases":["novel methodology","segmentation technique"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_ad844453ca180a20ba5bff8fc26c4995"},{"public_id":"co_bacb7bb2017e995c7932e3a3b6e56e49","status":"active","name":"water flow technique","description":"A line-extraction technique used to identify text lines in handwritten documents.","types":["method"],"aliases":[],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_bacb7bb2017e995c7932e3a3b6e56e49"},{"public_id":"co_d2aee16ad21c5dd25815d8daa2049c32","status":"active","name":"skew correction","description":"A preprocessing step that adjusts the orientation of word images to reduce slant or tilt.","types":["preprocessing method"],"aliases":["deskewing"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_d2aee16ad21c5dd25815d8daa2049c32"},{"public_id":"co_fec2d16423d3e61949e6102bfcc37c37","status":"active","name":"handwritten Malayalam documents","description":"Document images containing handwritten text in the Malayalam script.","types":["document type"],"aliases":["handwritten Malayalam document images"],"contributors":[{"id":1,"public_id":"12632b8b5f","public_label":"Anonymous (12632b8b5f)","roles":["extraction"],"url":"https://sah.borca.ai/u/12632b8b5f"}],"url":"https://sah.borca.ai/concepts/co_fec2d16423d3e61949e6102bfcc37c37"}],"external_ids":{"DOI":"10.1016/J.PROTCY.2016.05.140","ArXiv":null,"PubMed":null,"PubMedCentral":null,"MAG":2474174150,"DBLP":null,"ACL":null},"open_access":{"is_open_access":false,"pdf_url":null,"landing_url":"https://sah.borca.ai/papers/63206654","source":null,"pdf_url_source":null,"license":null,"reason":"pdf_url_not_indexed"},"reference_availability":{"status":"available","references_indexed":true,"full_text_available":false,"full_text_source":null,"count_basis":"semantic_scholar_metadata","extraction_status":"not_applicable","reason":null},"source":{"provider":"episteme2","base_corpus":"semantic_scholar_dump","freshness_mode":"unknown","basis":["semantic_scholar_metadata","postgres_metadata"],"limits":["paper metadata is based on indexed upstream scholarly datasets","claims and concepts are available only for extracted papers","absence of claims or concepts means no extracted graph data is available in this response"],"status":"available","degraded":false,"degraded_reasons":[],"diagnostics":{"status":"available","degraded":false,"degraded_reasons":[],"metadata_status":"available","graph_status":"available","abstract_status":"available"},"source_flags":1},"paper_id":631431,"paper_uid":"9375560f-59fc-475d-944c-6bef60633926","canonical_identity":{"paper_id":631431,"paper_uid":"9375560f-59fc-475d-944c-6bef60633926","identity_status":"available","lookup_basis":"semantic_scholar_external_id","compatibility_path":"corpus_id"},"url":"https://sah.borca.ai/papers/63206654"}