{"corpus_id":29151213,"paper_sha":"47bc34ae6f5dc104bc289ae3bb4fa75ef75fbc21","doi":"10.1609/aaai.v31i1.10723","arxiv_id":null,"pmid":null,"pmcid":null,"mag_id":2604909019,"dblp_id":"conf/aaai/RenYNLYZ17","acl_id":null,"title":"Unsupervised Deep Learning for Optical Flow Estimation","year":2017,"publication_date":"2017-02-12","venue":"AAAI Conference on Artificial Intelligence","journal":{"name":null,"pages":"1495-1501","volume":null},"journal_issn":null,"journal_title":null,"publication_types":["JournalArticle","Conference"],"pubmed_pub_types":null,"s2_fields_of_study":["Computer Science"],"reference_count":44,"citation_count":318,"influential_citation_count":22,"is_open_access":true,"arxiv_categories":null,"arxiv_license":null,"arxiv_journal_ref":null,"mesh_headings":null,"chemicals":null,"comments_corrections":null,"source_flags":1,"s2_open_access_pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/10723/10582","s2_open_access_landing_url":"https://www.semanticscholar.org/paper/47bc34ae6f5dc104bc289ae3bb4fa75ef75fbc21","s2_open_access_license":null,"s2_open_access_status":"BRONZE","pmc_open_access_pdf_url":null,"pmc_open_access_landing_url":null,"pmc_open_access_license":null,"pmc_open_access_status":null,"unpaywall_open_access_pdf_url":null,"unpaywall_open_access_landing_url":null,"unpaywall_open_access_license":null,"unpaywall_open_access_status":null,"abstract":"\n \n Recent work has shown that optical flow estimation can be formulated as a supervised learning problem. Moreover, convolutional networks have been successfully applied to this task. However, supervised flow learning is obfuscated by the shortage of labeled training data. As a consequence, existing methods have to turn to large synthetic datasets for easily computer generated ground truth. In this work, we explore if a deep network for flow estimation can be trained without supervision. Using image warping by the estimated flow, we devise a simple yet effective unsupervised method for learning optical flow, by directly minimizing photometric consistency. We demonstrate that a flow network can be trained from end-to-end using our unsupervised scheme. In some cases, our results come tantalizingly close to the performance of methods trained with full supervision.\n \n","claims":[{"public_id":"cl_6086d39ae73e6c67b9ffa17cfe405cee","status":"active","text":"A flow network can be trained from end-to-end using the proposed unsupervised scheme without labeled training data.","confidence":0.95,"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_6086d39ae73e6c67b9ffa17cfe405cee"},{"public_id":"cl_36877830f7530f762f1009346ecfa638","status":"active","text":"An unsupervised method for learning optical flow is devised by using image warping by the estimated flow and directly minimizing photometric consistency.","confidence":0.97,"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_36877830f7530f762f1009346ecfa638"},{"public_id":"cl_6af4b921b471fb15a310a0319da9a49f","status":"active","text":"In some cases, the unsupervised approach achieves performance tantalizingly close to methods trained with full supervision.","confidence":0.9,"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_6af4b921b471fb15a310a0319da9a49f"},{"public_id":"cl_2d932897303fb8c8281631733b3c92b9","status":"active","text":"Supervised flow learning requires large synthetic datasets for ground truth due to the shortage of labeled training data.","confidence":0.88,"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/claims/cl_2d932897303fb8c8281631733b3c92b9"}],"concepts":[{"public_id":"co_1797e85f47647d5a6cb70bae00635ffc","status":"active","name":"convolutional networks","description":"Deep convolutional neural network architectures applied to the optical flow estimation task.","types":["model architecture"],"aliases":["CNNs","convolutional neural networks"],"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_1797e85f47647d5a6cb70bae00635ffc"},{"public_id":"co_2709a1bd42e1dbce368ce6de47c2d88b","status":"active","name":"synthetic datasets","description":"Artificially computer-generated datasets with automatic ground truth labels, used to train supervised optical flow methods.","types":["dataset"],"aliases":["synthetic training data"],"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_2709a1bd42e1dbce368ce6de47c2d88b"},{"public_id":"co_2fffb760b463b45005a3ad87b995251c","status":"active","name":"supervised optical flow learning","description":"The conventional approach to training optical flow networks using labeled ground truth data.","types":["method","baseline"],"aliases":["supervised flow learning"],"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_2fffb760b463b45005a3ad87b995251c"},{"public_id":"co_5a96075ffea1cb1802c13f22e7831f52","status":"active","name":"unsupervised optical flow learning","description":"A training approach for optical flow networks that requires no labeled ground truth, using self-supervised signals instead.","types":["method"],"aliases":["unsupervised flow learning"],"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_5a96075ffea1cb1802c13f22e7831f52"},{"public_id":"co_7778cdc648eb510b3b7f8251ed58ff1f","status":"active","name":"end-to-end training","description":"A training paradigm where the full network is optimized jointly from raw inputs to final outputs in a single learning process.","types":["training paradigm"],"aliases":["end-to-end learning"],"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_7778cdc648eb510b3b7f8251ed58ff1f"},{"public_id":"co_ab781040a28204bbe7dabdba0ed3fb1d","status":"active","name":"photometric consistency","description":"The constraint that corresponding pixels across warped image pairs should have consistent intensity values, used here as the unsupervised training objective.","types":["objective","constraint"],"aliases":["photometric loss"],"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_ab781040a28204bbe7dabdba0ed3fb1d"},{"public_id":"co_bb393ed37308700ef36f9c112b8b0727","status":"active","name":"optical flow estimation","description":"The task of estimating pixel-level motion between consecutive images, addressed here via deep learning.","types":["task"],"aliases":["flow estimation"],"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_bb393ed37308700ef36f9c112b8b0727"},{"public_id":"co_e7c74400906874e9a0ad99d249e09b08","status":"active","name":"image warping","description":"The technique of spatially transforming one image using the estimated optical flow to reconstruct another image, used here as a self-supervised training signal.","types":["technique"],"aliases":[],"contributors":[{"id":170,"public_id":"gsgmdx9r6e","public_label":"pupuri (gsgmdx9r6e)","roles":["extraction"],"url":"https://sah.borca.ai/u/gsgmdx9r6e"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":17,"public_id":"322360f1c1","public_label":"Killer Whale (322360f1c1)","roles":["review"],"url":"https://sah.borca.ai/u/322360f1c1"}],"url":"https://sah.borca.ai/concepts/co_e7c74400906874e9a0ad99d249e09b08"}],"external_ids":{"DOI":"10.1609/aaai.v31i1.10723","ArXiv":null,"PubMed":null,"PubMedCentral":null,"MAG":2604909019,"DBLP":"conf/aaai/RenYNLYZ17","ACL":null},"open_access":{"is_open_access":true,"pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/10723/10582","landing_url":"https://www.semanticscholar.org/paper/47bc34ae6f5dc104bc289ae3bb4fa75ef75fbc21","source":"semantic_scholar","pdf_url_source":"semantic_scholar_open_access_pdf","license":null,"status":"BRONZE","reason":null},"reference_availability":{"status":"available","references_indexed":true,"full_text_available":false,"full_text_source":null,"count_basis":"semantic_scholar_metadata","extraction_status":"not_applicable","reason":null},"source":{"provider":"episteme2","base_corpus":"semantic_scholar_dump","freshness_mode":"unknown","basis":["semantic_scholar_metadata","postgres_metadata"],"limits":["paper metadata is based on indexed upstream scholarly datasets","claims and concepts are available only for extracted papers","absence of claims or concepts means no extracted graph data is available in this response"],"status":"available","degraded":false,"degraded_reasons":[],"diagnostics":{"status":"available","degraded":false,"degraded_reasons":[],"metadata_status":"available","graph_status":"available","abstract_status":"available"},"source_flags":1},"paper_id":630798,"paper_uid":"3078cb5c-9358-41da-8719-671551abae9c","canonical_identity":{"paper_id":630798,"paper_uid":"3078cb5c-9358-41da-8719-671551abae9c","identity_status":"available","lookup_basis":"semantic_scholar_external_id","compatibility_path":"corpus_id"},"url":"https://sah.borca.ai/papers/29151213"}