{"id":445,"date":"2022-09-29T12:09:21","date_gmt":"2022-09-29T04:09:21","guid":{"rendered":"http:\/\/blog.cyasylum.top\/?p=445"},"modified":"2023-11-05T12:10:01","modified_gmt":"2023-11-05T04:10:01","slug":"diffusion-zpswxc","status":"publish","type":"post","link":"http:\/\/blog.cyasylum.top\/index.php\/2022\/09\/29\/diffusion-zpswxc\/","title":{"rendered":"Diffusion \u7b14\u8bb0"},"content":{"rendered":"<h1>Diffusion<\/h1>\n<p>CLIP + VQGN + Diffusion = DALLE2<\/p>\n<\/p>\n<p>\u200d<\/p>\n<p>\u6269\u6563\u6a21\u578b\uff0c\u57fa\u4e8e\u5206\u5e03\u7684\u601d\u60f3<\/p>\n<p>\u57fa\u4e8e\u89c4\u5f8b\u8fdb\u884c\u6269\u6563\u8fc7\u7a0b<\/p>\n<p><span style=\"font-weight: bold;\" data-type=\"strong\">GAN<\/span><\/p>\n<p>\u751f\u6210\u5668\u5224\u522b\u5668\u8fed\u4ee3\u903c\u8fd1\u751f\u6210\u76ee\u6807<\/p>\n<p>\u8868\u73b0\u535a\u5f08\u5bf9\u6297\u751f\u6210\u8fc7\u7a0b<\/p>\n<p>cons<\/p>\n<p>\u8bad\u7ec3\u591a\u4e2a\u7f51\u7edc\uff0c\u6536\u655b\u96be\u5ea6\u8f83\u5927<\/p>\n<p>\u635f\u5931\u96be\u4ee5\u89c2\u6d4b<\/p>\n<p>\u5bb9\u6613\u5b66\u4e60\u504f\u89c1\uff08\u9a97\u8fc7\u5224\u522b\u5668<\/p>\n<p>\u591a\u6837\u6027\u964d\u4f4e<\/p>\n<p><span style=\"font-weight: bold;\" data-type=\"strong\">Diffusion<\/span><\/p>\n<p><a href=\"https:\/\/arxiv.org\/abs\/2006.11239\" target=\"_blank\"  rel=\"nofollow\" >DDPM<\/a> \u539f\u6587<\/p>\n<p>Source\u76ee\u6807\u5b58\u5728\u7a33\u5b9a\u5206\u5e03<\/p>\n<p>\u901a\u8fc7\u8fed\u4ee3\u52a0\u566a\u97f3\u5b9e\u73b0\u6269\u6563\uff0c\u51fb\u788e\u5206\u5e03<\/p>\n<p>\u7f51\u7edc\u8bad\u7ec3\u53bb\u566a\u8fc7\u7a0b<\/p>\n<p>\u524d\u5411\u6269\u6563 + \u53cd\u5411\u8fd8\u539f = Diffusion\u6a21\u578b<\/p>\n<p>\u57fa\u4e8e\u65f6\u523b\u8fdb\u884c\u8fed\u4ee3<\/p>\n<p>\uff08\u566a\u58f0\u4f5c\u4e3a\u6807\u7b7e\uff09<\/p>\n<p>\u200d<\/p>\n<h1>\u52a0\u566a\u8fc7\u7a0b<\/h1>\n<p>$\\alpha_t = 1-\\beta_t$ \u5bf9\u4e8e\u6bcf\u4e2a\u65f6\u523b\uff0c\u65b0\u589e\u566a\u97f3\uff0c\u4fdd\u6301\u76f8\u540c\u7684<em>\u6269\u6563\u5e45\u5ea6<\/em>\uff0c\u6709$\\beta_t$\u4ece\u8d77\u59cb\u65f6\u523b\u8d77\u9010\u6e10\u589e\u5927<\/p>\n<p>\u901a\u8fc7$x _ { t } = \\sqrt { \\alpha _ { t } } x _ { t - 1 } + \\sqrt { 1 - \\alpha _ { t } } z _ { 1 }$ \u5bf9\u56fe\u50cf\u8fdb\u884c\u8fed\u4ee3\uff0c\u7531$\\alpha_t$\u63a7\u5236\u4e0a\u4e00\u65f6\u523b\u4fdd\u7559\u7684\u5e45\u5ea6\uff0c\u4ece\u800c\u63a7\u5236\u6574\u4f53\u6269\u6563\u5e45\u5ea6<\/p>\n<p>\uff08\u4e32\u884c\u8ba1\u7b97\u58a8\u8ff9\u96be\u4ee5\u8bad\u7ec3\uff0c\u524d\u5411\u8fc7\u7a0b\u5e72\u5c31\u5b8c\u4e8b\u4e86\uff09<\/p>\n<p>\u200d<\/p>\n<p>\u8bbe\u8ba1$\\alpha_t$\u6309\u5df2\u77e5\u9879\u7ebf\u6027\u8870\u51cf\uff0c\u5bf9\u4e0a\u8ff0\u8fed\u4ee3\u9879\u8fdb\u884c\u9012\u63a8<\/p>\n<p>$$x<em>t= \\sqrt { a <\/em> { t } a <em> { t - 1 } } x <\/em> { t - 2 } + ( \\sqrt { a <em> { t } ( 1 - \\alpha <\/em> { t - 1 } ) } z <em> { 2 } + \\sqrt { 1 - \\alpha <\/em> { t } } z <em> { 1 } )= \\sqrt { a <\/em> { t } a <em> { t - 1 } } x <\/em> { t - 2 } + \\sqrt { 1 - \\alpha <em> { t } \\alpha <\/em> { t - 1 } } z _ { 2 }$$<\/p>\n<p>\u4f9d\u636e\u9ad8\u65af\u5206\u5e03\u4e58\u4ee5\u7cfb\u6570\u7b49\u4e8e\u91cd\u91c7\u6837\u6539\u53d8\u65b9\u5dee\u7684\u89c4\u5219\uff0c\u5bf9\u9ad8\u65af\u566a\u58f0\u9879\u8fdb\u884c\u5316\u7b80<\/p>\n<p>\u53ef\u5f97<span style=\"font-weight: bold;\" data-type=\"strong\">\u5feb\u901f\u8fed\u4ee3\u516c\u5f0f<\/span>\u6709<\/p>\n<p>$$x_t= \\sqrt{\\bar\\alpha_t}x_0 + \\sqrt{1-\\bar\\alpha_t} z_t$$<\/p>\n<p>\u5176\u4e2d$\\bar\\alpha_t$\u4e3a\u8fed\u4ee3\u7d2f\u4e58\u9879\uff0c\u800c$z_t$\u4ecd\u4e3a\u6807\u51c6\u5206\u5e03<\/p>\n<p>\u200d<\/p>\n<h1>\u53bb\u566a\u8fc7\u7a0b<\/h1>\n<p><img decoding=\"async\" src=\"http:\/\/kodo.cyasylum.top\/siyuan\/202311051209091.png\" alt=\"image\" \/>\u200b<\/p>\n<p>\u6784\u5efa\u4e00\u4e2a\u5206\u5e03\u7684\u8fd8\u539f\u601d\u60f3$P(X_{t-1}|X_t)$<\/p>\n<p>\u8d1d\u53f6\u65af\u516c\u5f0f\uff0c\u5b9e\u73b0\u9006\u8fc7\u7a0b<\/p>\n<p>$$q ( x <em> { t - 1 } | x <\/em> { t } , x <em> { 0 } ) = q ( x <\/em> { t } | x <em> { t  - 1} , x <\/em> { 0 } ) \\frac { q ( x <em> { t - 1 } | x <\/em> { 0 } ) } { q ( x <em> { t } | x <\/em> { 0 } ) }$$<\/p>\n<p>\u5df2\u77e5\u52a0\u566a\u7684\u8fed\u4ee3\u65b9\u6cd5$q(x_t|x_{t-1},x_0) = \\sqrt { \\alpha _ { t } } x _ { t - 1 } + \\sqrt { 1 - \\alpha _ { t } } z$\uff0c\u4f7f\u7528\u8d1d\u53f6\u65af\u6c42\u9006\u5411\u53bb\u566a\u8fc7\u7a0b<\/p>\n<p>\u53c8\u5df2\u77e5$q(x_t|x_0),q(x_{t-1}|x_0)$\u5747\u53ef\u7531\u5feb\u901f\u8fed\u4ee3\u516c\u5f0f\u524d\u5411\u53d6\u5f97\uff0c\u56e0\u6b64\u9006\u5411\u8fc7\u7a0b\u7684\u76ee\u6807\u4e5f\u662f\u53ef\u6c42\u7684<\/p>\n<p>\u200d<\/p>\n<blockquote>\n<p>\u6709\u6982\u7387\u9879\u670d\u4ece\u4e0d\u540c\u6b63\u6001\u5206\u5e03<\/p>\n<p>$$\\begin{array}{lll}<br \/>\nq ( x _ { t } | x _ { t  - 1} , x _ { 0 } )<br \/>\n&  \\sqrt{\\bar\\alpha_{t-1}}x_0 + \\sqrt{1-\\bar\\alpha_{t-1}} z<br \/>\n& \\sim\\mathcal N(\\sqrt{\\bar\\alpha_{t-1}}x_0, 1-\\bar\\alpha_{t-1}) \\\\<br \/>\nq ( x _ { t - 1 } | x _ { 0 } )<br \/>\n&    \\sqrt{\\bar\\alpha_t}x_0 + \\sqrt{1-\\bar\\alpha_t} z<br \/>\n& \\sim\\mathcal N(\\sqrt{\\bar\\alpha_{t}}x_0, 1-\\bar\\alpha_{t})\\\\<br \/>\nq ( x _ { t } | x _ { 0 } )<br \/>\n&  \\sqrt { \\alpha _ { t } } x _ { t - 1 } + \\sqrt { 1 - \\alpha _ { t } } z<br \/>\n& \\sim\\mathcal N(\\sqrt{\\alpha_{t}}x_{t-1}, 1-\\alpha_{t}) <\/p>\n<p>\\end{array}$$<\/p>\n<p>\u7531\u6b64\uff0c\u6211\u4eec\u53ef\u5f97$q ( x _ { t - 1 } | x _ { t } , x _ { 0 } )$\u7684\u6b63\u6001\u5206\u5e03\u5217<\/p>\n<p>$$\\propto \\exp ( - \\frac { 1 } { 2 } (<br \/>\n\\frac { ( x _ { t } - \\sqrt { a _ { t } } x _ { t  - 1} ) ^ { 2 } } { \\beta _ { t } }<br \/>\n+ \\frac { ( x _ { t - 1 }- \\sqrt { \\bar \\alpha _ { t - 1 } }x _ { 0}  ) ^ { 2 }  } { 1 -\\bar\\alpha_{t-1}}<br \/>\n- \\frac { ( x _ { t } - \\sqrt { \\bar \\alpha _ { t } }x _ { 0}  ) ^ { 2 }  } { 1 -\\bar\\alpha_{t}}<br \/>\n))$$<\/p>\n<p>\u5316\u7b80\u5c55\u5f00\u5408\u5e76\u540c\u7c7b\u9879\uff0c\u5c06\u9700\u8981\u8ba1\u7b97\u7684\u6838\u5fc3\u76ee\u6807$x_{t-1}$\u6574\u7406\u5f97<\/p>\n<p>$$=\\exp(- \\frac{1}{2}(<br \/>\n(\\frac{\\alpha _{t}}{\\beta _{t}}+ \\frac{1}{1- \\bar{\\alpha}_{t-1}})x_{t-1}^{2}<br \/>\n-(<br \/>\n\\frac{2 \\sqrt{\\alpha _{t}}}{\\beta _{t}}x_{t}<br \/>\n+ \\frac{2 \\sqrt{\\bar\\alpha_{t-1}}}{1- \\bar{\\alpha}_{t-1}}x_{0}<br \/>\n)x_{t-1}<\/p>\n<p>+C(x_{t},x_{0})))$$<\/p>\n<p>\u53c8\u6709\u6b63\u6001\u5206\u5e03\u7684\u5206\u89e3\u516c\u5f0f\u4e3a<\/p>\n<p>$$\\exp(- \\frac{(x- \\mu)^{2}}{2 \\sigma ^{2}})=\\exp(- \\frac{1}{2}(\\frac{1}{\\sigma ^{2}}x^{2}- \\frac{2 \\mu}{\\sigma ^{2}}x+ \\frac{\\mu ^{2}}{\\sigma ^{2}}))$$<\/p>\n<p>\u6bd4\u8f83\u4e0a\u4e0b\u4e24\u5f0f\u5173\u7cfb\uff0c\u53ef\u5f97\u5747\u503c\u4e0e\u65b9\u5dee\uff08\u6c42\u5747\u503c\u5305\u542b\u5bf9$x_0$\u7684\u4f30\u8ba1\uff09<\/p>\n<p>$$\\tilde{\\mu}_{t}= \\frac{1}{\\sqrt{a_{t}}}(x_{t}- \\frac{\\beta _{t}}{\\sqrt{1- \\bar{a}_{t}}}{z}_{t})$$\n<\/p><\/blockquote>\n<p>\u7531\u6b64\uff0c\u7ed9\u5b9a$x_t$\u548c\u4e00\u7ec4\u5206\u5e03\uff0c\u6211\u4eec\u80fd\u53d6\u5f97$x_{t-1}$\u5206\u5e03\u7684\u5747\u503c\u548c\u65b9\u5dee\uff0c\u4ece\u800c\u8fdb\u884c\u8fd8\u539f<\/p>\n<p>\u800c\u5176\u4e2d\u5269\u4e0b\u6700\u540e\u4e00\u4e2a\u5173\u952e\u91cf\u4e3a\u566a\u58f0\u5206\u5e03$z_t$\uff0c\u65e0\u6cd5\u663e\u5f0f\u6c42\u89e3\uff0c\u5219\u8bad\u7ec3\u6a21\u578b\u9884\u6d4b$x_t$\u65f6\u523b\u6dfb\u52a0\u7684\u566a\u58f0$z_t$<\/p>\n<p>\u200d<\/p>\n<h1>\u566a\u58f0\u4f30\u8ba1<\/h1>\n<p>\u7531\u524d\u5411\u8fc7\u7a0b\u63d0\u4f9b\u6807\u7b7e\uff08\u4ee5\u566a\u58f0\u7684\u5f62\u5f0f\uff09\uff0c\u5728\u53cd\u5411\u8fc7\u7a0b\u4e2d\u4f9d\u636e\u6807\u7b7e\u9884\u6d4b<\/p>\n<p>\u4f7f\u7528Unet\u7ed3\u6784\u548cTransformer\u7b49\u8fdb\u884c\u566a\u58f0\u4f30\u8ba1<\/p>\n<p><img decoding=\"async\" src=\"http:\/\/kodo.cyasylum.top\/siyuan\/202311051209486.png\" alt=\"image\" \/>\u200b<\/p>\n<p><span style=\"font-weight: bold;\" data-type=\"strong\">\u8bad\u7ec3\u6d41\u7a0b<\/span><\/p>\n<p>\u968f\u673a\u8fdb\u884c\u65f6\u523b\u6269\u6563\u548c\u8ba1\u7b97\uff0c\u53d6\u968f\u673a\u566a\u58f0<\/p>\n<p>\u5b66\u4e60\u6a21\u578b$\\epsilon_\\theta (X_t,t)$\uff0c\u7528\u4e8e\u5b66\u4e60t\u65f6\u523b\u7684\u566a\u58f0\uff08\u5176\u4e2d$X_t$\u4e3a\u52a0\u566a\u540e\u56fe\u50cf\uff09\uff0c\u53d6\u771f\u5b9e\u503c\u4e0e\u9884\u6d4b\u503c\u505a\u635f\u5931\u68af\u5ea6\u4e0b\u964d<\/p>\n<p>\u4f7f\u7528\u6a21\u578b\u5b66\u4e60\u5982\u4f55\u4e3a\u7f51\u7edc\u6dfb\u52a0\u566a\u58f0<\/p>\n<p><span style=\"font-weight: bold;\" data-type=\"strong\">\u91c7\u6837\u6d41\u7a0b<\/span><\/p>\n<p>\u968f\u673a\u53d6\u51fa\u6807\u51c6\u6b63\u6001\u7eaf\u566a\u58f0$X_T$<\/p>\n<p>\u5faa\u73af\u4f9d\u636e\u9884\u6d4b\u566a\u58f0\u548c\u9006\u8fc7\u7a0b\u7684\u8fed\u4ee3\u516c\u5f0f\u5bf9\u4e0a\u4e00\u65f6\u523b\u8fdb\u884c\u6c42\u89e3<\/p>\n<p>$$x<em>{t-1}= \\frac{1}{\\sqrt{\\alpha <\/em>{t}}}(x<em>{t}- \\frac{1- \\alpha <\/em>{t}}{\\sqrt{1- \\overline{\\alpha}<em>{t}}}\\epsilon <\/em>{\\theta}(x<em>{t},t))+ \\sigma <\/em>{t}z$$<\/p>\n<p>\uff08\u6700\u540e\u7684$\\sigma_t z$\u5e94\u8be5\u662f\u5f53\u524d\u65f6\u523b\u65b9\u5dee\u7684\u6b63\u5219\u5316\u9879\uff0c\u5373\u9ad8\u65af\u5206\u5e03\u91cd\u91c7\u6837\uff0c\u6240\u4ee5\u5728\u6700\u540e\u6b65\u9aa4\u4e2d\u907f\u514d\u52a0\u5165\uff09<\/p>\n<p>\uff08\u91cd\u53c2\u6570\u6280\u5de7\uff0c\u89e3\u51b3\u68af\u5ea6\u4e0d\u80fd\u53cd\u5411\u4f20\u64ad\uff09<\/p>\n<p>\u53e6\u6709$\\epsilon_\\theta$\u8d8b\u8fd1\u4e8e\u670d\u4ece\u6807\u51c6\u6b63\u6001\u5206\u5e03<\/p>\n<p>\u200d<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Diffusion CLIP + VQGN + Diffusion = DALLE2 \u200d \u6269\u6563\u6a21\u578b\uff0c\u57fa\u4e8e\u5206\u5e03\u7684\u601d\u60f3 \u57fa\u4e8e\u89c4\u5f8b\u8fdb\u884c\u6269 &#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"emotion":"","emotion_color":"","title_style":"","license":"","footnotes":""},"categories":[1],"tags":[],"class_list":["post-445","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"_links":{"self":[{"href":"http:\/\/blog.cyasylum.top\/index.php\/wp-json\/wp\/v2\/posts\/445","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/blog.cyasylum.top\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/blog.cyasylum.top\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/blog.cyasylum.top\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/blog.cyasylum.top\/index.php\/wp-json\/wp\/v2\/comments?post=445"}],"version-history":[{"count":2,"href":"http:\/\/blog.cyasylum.top\/index.php\/wp-json\/wp\/v2\/posts\/445\/revisions"}],"predecessor-version":[{"id":449,"href":"http:\/\/blog.cyasylum.top\/index.php\/wp-json\/wp\/v2\/posts\/445\/revisions\/449"}],"wp:attachment":[{"href":"http:\/\/blog.cyasylum.top\/index.php\/wp-json\/wp\/v2\/media?parent=445"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/blog.cyasylum.top\/index.php\/wp-json\/wp\/v2\/categories?post=445"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/blog.cyasylum.top\/index.php\/wp-json\/wp\/v2\/tags?post=445"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}