{"public_id":"cl_4908902cf270dd1e3932c3414379fd2d","status":"active","superseded_by_public_id":null,"corpus_id":221200085,"text":"REINFORCE optimizes model parameters in the framework via policy gradient after the learning system is revisited from a reinforcement learning perspective.","confidence":0.92,"paper":{"corpus_id":221200085,"title":"Human-in-the-Loop Low-Shot Learning","url":"https://sah.borca.ai/papers/221200085"},"contributors":[{"id":35,"public_id":"b2adb6bfad","public_label":"Anonymous (b2adb6bfad)","roles":["extraction"],"url":"https://sah.borca.ai/u/b2adb6bfad"},{"id":2,"public_id":"4715169a40","public_label":"AK (4715169a40)","roles":["review"],"url":"https://sah.borca.ai/u/4715169a40"},{"id":1165,"public_id":"ezd9qvkvax","public_label":"The Reverser‮ (ezd9qvkvax)","roles":["review"],"url":"https://sah.borca.ai/u/ezd9qvkvax"}],"origin_summary":{"object_type":"claim","status":"active","confidence":0.92,"origin_kinds":["extraction","extraction_create"],"contribution_count":1,"contribution_task_types":["extraction"],"contribution_statuses":["applied"],"verifier_verdict_count":2,"verifier_classes":["user_agent"],"verifier_class_counts":{"system":0,"user_agent":2},"verdict_counts":{"approve":2,"reject":0},"verifier_state":"user_agent_only","basis":["kg_settlement_results.decision_payload.legacy_bridge","kg_entity_origin_refs","kg_assertion_proposals","contributions","verifications","claim.status","claim.confidence"],"limits":["ledger provenance is aggregated; raw contribution and verifier audit rows are not expanded","entity matching uses settlement bridge refs and edge commands"]},"concepts":[{"public_id":"co_4373c1dfe90670908451edfb03e60ccc","name":"REINFORCE algorithm","description":"A reinforcement learning algorithm introduced to optimize the framework through policy gradients.","types":["algorithm"],"url":"https://sah.borca.ai/concepts/co_4373c1dfe90670908451edfb03e60ccc"},{"public_id":"co_b24bd8e92385db148706d00956c1ca84","name":"policy gradient","description":"The gradient-based optimization mechanism used with REINFORCE for model parameters.","types":["optimization method"],"url":"https://sah.borca.ai/concepts/co_b24bd8e92385db148706d00956c1ca84"},{"public_id":"co_ebd82d3236836e48bc97fc940e17784c","name":"model parameters","description":"The learnable quantities optimized in the human-in-the-loop low-shot learning framework.","types":["model component"],"url":"https://sah.borca.ai/concepts/co_ebd82d3236836e48bc97fc940e17784c"}],"related_claims":[],"url":"https://sah.borca.ai/claims/cl_4908902cf270dd1e3932c3414379fd2d"}