#EXTM3U #EXT-SL-VERSION:1 #EXT-SL-ACCOUNT-ID:114360 #EXT-SL-PRESENTATION-ID:38995003 #EXT-SL-PRESENTATION-TITLE:Off-policy Reinforcement Learning with Optimistic Exploration and Distribution Correction #EXT-SL-PRESENTATION-UPDATED-AT:2023-10-20T19:39:26Z #EXT-SL-PLAYER-TYPE:video_slideshow #EXT-SL-PRESENTATION-THUMBNAIL:https://ma.slideslive.com/library/presentations/38995003/thumbnail/offpolicy-reinforcement-learning-with-optimistic-exploration-and-distribution-correction_yR3W8J_big.jpg #EXT-SL-SLIDESLIVE-LOGO-VISIBLE:false #EXT-SL-SLIDESLIVE-LOGO-LINKIFY:false #EXT-SL-CUSTOM-CMCD-TRACKING:false #EXT-SL-PLAYLIST-TYPE:vod #EXT-SL-PRESENTATION-MEDIA-SET-ID:238895 #EXT-SL-VOD-VIDEO-SERVICE-NAME:yoda #EXT-SL-VOD-VIDEO-ID:v3lenExIWmCm #EXT-SL-VOD-VIDEO-KEN-ENABLED: #EXT-SL-VOD-VIDEO-SERVERS:["sl-yoda-v2-stream-008-alpha.b-cdn.net","1159783934.rsc.cdn77.org","1511376917.rsc.cdn77.org","sl-yoda-v2-stream-008-beta.b-cdn.net"] #EXT-SL-VOD-SLIDES-XML-URL:https://s.slideslive.com/38995003/v3/38995003.xml?1668793191 #EXT-SL-VOD-SLIDES-JSON-URL:https://s.slideslive.com/38995003/v3/slides.json?1668793191 #EXT-SL-VOD-SUBTITLES:[{"name":"English","language":"en","subtitles_id":100279,"webvtt_url":"https://slideslive-video-subtitles.s3.amazonaws.com/100279/subtitles.vtt?X-Amz-Algorithm=AWS4-HMAC-SHA256\u0026X-Amz-Credential=AKIAXWNGJW2E2DUNCDBO%2F20241119%2Fus-east-1%2Fs3%2Faws4_request\u0026X-Amz-Date=20241119T053722Z\u0026X-Amz-Expires=86400\u0026X-Amz-SignedHeaders=host\u0026X-Amz-Signature=63b6b751c9677922d3800232f243af428544839314cec8422d0fb8ed71fe2e34"}]