[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"pack-detail-phd-researcher-lit-code-zh":3,"seo:pack:phd-researcher-lit-code:zh":99},{"code":4,"message":5,"data":6},200,"操作成功",{"pack":7},{"slug":8,"icon":9,"tone":10,"status":11,"status_label":12,"title":13,"description":14,"items":15,"install_cmd":98},"phd-researcher-lit-code","🎓","#7C2D12","new","本周新建","博士研究者的文献 + 复现代码包","十件资产给真在做严肃文献综述、要把论文代码跑通的博士：Zotero、arXiv MCP、GPT Researcher、academic-researcher agent、Marker、Nougat、JupyterLab、Papermill、Overleaf、AI Scientist。文献检索 → 文献管理 → PDF 解析 → 阅读 → 复现 → 写作 一条龙。",[16,28,38,46,53,61,68,74,81,88],{"id":17,"uuid":18,"slug":19,"title":20,"description":21,"author_name":22,"view_count":23,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4612,"74dca4cd-5468-11f1-9bc6-00163e2b0d79","zotero-free-research-source-manager-citation-tool-74dca4cd","Zotero — Free Research Source Manager and Citation Tool","Zotero is a free, open-source reference management tool that helps you collect, organize, annotate, cite, and share research sources. Available on Windows, macOS, Linux, and iOS, it supports one-click saving from browsers and generates citations in thousands of styles.","AI Open Source",31,0,"en","skill","Skill",{"id":29,"uuid":30,"slug":31,"title":32,"description":33,"author_name":34,"view_count":35,"vote_count":24,"lang_type":25,"type":36,"type_label":37},3201,"c1c31b4a-7f40-4b0a-9304-122f7d9b00d1","arxiv-mcp-server-search-and-analyze-papers","arXiv MCP Server — Search and Analyze Papers","arxiv-mcp-server is an MCP server for searching and analyzing arXiv papers, with uvx\u002Fuv tool stdio launch examples for reproducible research workflows.","MCP Hub",68,"mcp","MCP",{"id":39,"uuid":40,"slug":41,"title":42,"description":43,"author_name":44,"view_count":45,"vote_count":24,"lang_type":25,"type":26,"type_label":27},25,"23330210-b26a-4d97-ad97-1735c203eaa6","gpt-researcher-autonomous-research-report-agent-23330210","GPT Researcher — Autonomous Research Report Agent","AI agent that generates detailed research reports from a single query. Searches multiple sources, synthesizes findings, and cites references.","TokRepo精选",565,{"id":47,"uuid":48,"slug":49,"title":50,"description":51,"author_name":44,"view_count":52,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4354,"ed4529f4-7ba4-4886-8c82-10e48d5a0a5f","claude-code-agent-academic-researcher-ed4529f4","Claude Code Agent: Academic Researcher","Academic research specialist for scholarly sources, peer-reviewed papers, and academic literature. Use PROACTIVELY for research paper analysis, literature reviews, citation...",35,{"id":54,"uuid":55,"slug":56,"title":57,"description":58,"author_name":59,"view_count":60,"vote_count":24,"lang_type":25,"type":26,"type_label":27},210,"42976daf-a56a-4152-9afb-d5b00d130a08","marker-convert-pdf-markdown-high-accuracy-42976daf","Marker — Convert PDF to Markdown with High Accuracy","Fast, accurate PDF to Markdown + JSON converter. Handles tables, images, equations, code blocks, and multi-column layouts. GPU-accelerated. 33K+ GitHub stars.","Script Depot",135,{"id":62,"uuid":63,"slug":64,"title":65,"description":66,"author_name":22,"view_count":67,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4670,"ed1264b8-54cb-11f1-9bc6-00163e2b0d79","nougat-neural-optical-understanding-academic-documents-ed1264b8","Nougat — Neural Optical Understanding for Academic Documents","Nougat is a visual transformer model from Meta that converts academic PDF pages into structured Markdown, accurately preserving mathematical equations, tables, and text formatting.",20,{"id":69,"uuid":70,"slug":71,"title":72,"description":73,"author_name":22,"view_count":35,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2527,"4de315f7-4686-11f1-9bc6-00163e2b0d79","jupyterlab-next-generation-interactive-development-4de315f7","JupyterLab — Next-Generation Interactive Development Environment","The extensible web-based IDE for notebooks, code, and data from Project Jupyter, succeeding the classic Jupyter Notebook interface.",{"id":75,"uuid":76,"slug":77,"title":78,"description":79,"author_name":22,"view_count":80,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2411,"4be4a73a-4492-11f1-9bc6-00163e2b0d79","papermill-parameterize-execute-jupyter-notebooks-4be4a73a","Papermill — Parameterize and Execute Jupyter Notebooks","Papermill is a Python tool for parameterizing, executing, and analyzing Jupyter notebooks programmatically, enabling notebook-based pipelines and report generation.",130,{"id":82,"uuid":83,"slug":84,"title":85,"description":86,"author_name":59,"view_count":87,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1808,"8d4b8be6-3c4d-11f1-9bc6-00163e2b0d79","overleaf-self-hosted-collaborative-latex-editor-8d4b8be6","Overleaf — Self-Hosted Collaborative LaTeX Editor","Overleaf is an open-source web-based LaTeX editor that enables real-time collaborative document editing. Self-host it with Docker to keep your academic papers and technical documents on your own infrastructure.",69,{"id":89,"uuid":90,"slug":91,"title":92,"description":93,"author_name":94,"view_count":95,"vote_count":24,"lang_type":25,"type":96,"type_label":97},622,"0a2623ca-92b3-4fba-82e0-fc9a7cda45bd","ai-scientist-automated-research-paper-generation-0a2623ca","AI Scientist — Automated Research Paper Generation","Fully automated AI system that conducts research, runs experiments, and writes complete scientific papers. Generates novel ideas, implements them, and produces LaTeX manuscripts. 12,000+ stars.","Prompt Lab",165,"prompt","Prompt","tokrepo install pack\u002Fphd-researcher-lit-code",{"pageType":100,"pageKey":8,"locale":101,"title":102,"metaDescription":103,"h1":104,"tldr":105,"bodyMarkdown":106,"faq":107,"schema":123,"internalLinks":129,"citations":142,"wordCount":155,"generatedAt":156},"pack","zh","博士研究者的文献 + 复现代码包 — 10 个工具搞定文献综述和论文代码复现","Zotero \u002F arXiv MCP \u002F GPT Researcher \u002F Claude academic-researcher agent \u002F Marker \u002F Nougat \u002F JupyterLab \u002F Papermill \u002F Overleaf \u002F AI Scientist — 给真的在做严肃文献综述、还想把论文里的代码跑起来的博士。按顺序装。","博士研究者的文献 + 复现代码包 — 10 件资产覆盖文献综述到代码复现","十个开源工具按安装顺序排好：文献检索 → 文献管理 → PDF 转干净 markdown → 阅读和摘要 → 跑论文代码的 notebook 环境 → 写论文的 LaTeX 环境。AI 是助手，不是「方法论部分不读了」的借口。","## 这个 pack 包含什么\n\n这是给已经过了「跟 ChatGPT 聊一聊我的研究方向」阶段、进入真正硬核工作的博士生 \u002F 博士后的工具栈：(a) 认真读 200 篇 paper，(b) 追踪谁引用了谁，(c) 真把作者放出来的代码跑通，(d) 最后写出能在答辩台上守住的东西。每个 pick 都**开源**、**活跃维护**、在 pipeline 里占一个明确的位置。\n\n这套 pack 的态度是：拒绝假装 AI 能替你读 methodology。AI 在以下环节进入循环 — **文献分诊**、**PDF 清洗**、**初步摘要**、**复现代码 debug**、**写作起草**。但博士如果不真读 methods，答辩日子不会好过。工具的排布刻意让 AI **不挡在你和论文之间**，只在周围打辅助。\n\n## 推荐安装顺序\n\n1. **Zotero** — 文献管理。博士第一天就装。浏览器插件一键抓 metadata + PDF，按 collection 归档，多设备同步，导出 BibTeX。如果从第一周就没建好引用的「唯一真相源」，第 36 个月一定还。\n2. **arXiv MCP Server** — 从 Claude \u002F Cursor \u002F 任何支持 MCP 的客户端里直接查 arXiv。搜索、抓 metadata 和全文、用一个 tool call 把论文喂给模型。替代「开浏览器 → 搜 → 复制 DOI → 粘回去」的循环。\n3. **GPT Researcher** — 自主文献综述 agent。给一句 query（「transformer scaling laws compute-optimal training」），它去多个来源搜，综合发现，引用参考，输出一份草稿 survey。当作**陌生子领域的第一张地图**用 — 绝对不当作最终引用列表。\n4. **Claude Code Agent: Academic Researcher** — 一个为学术工作流调优的 Claude Code subagent：结构化阅读论文、提取 methodology、遍历引用图。住在你的 Claude Code 项目里，prompt 和约定跟论文仓一起被版本控制。\n5. **Marker** — PDF → 干净 Markdown 转换器。AI 辅助阅读的最大 unlock。Marker 能处理数学、表格、图、多栏布局。一篇 40 页 paper 转一次 Markdown，之后任何 LLM 都能干净 ingest，OCR 噪音不会把 methodology 啃掉。\n6. **Nougat** — Meta 出的神经 OCR，专门在学术文档上训练。Marker 快而通用，Nougat 是**公式密集**论文（理论 ML、物理、数学）的重型选手。LaTeX-aware 输出。Marker 把关键证明搞糊时换 Nougat。\n7. **JupyterLab** — 真正跑论文代码、改代码、画变体、复核 claim 的 notebook IDE。多文档工作区、终端、文件浏览器。复现到底成不成功就在这里见分晓。\n8. **Papermill** — 命令行参数化执行 notebook。当你要把论文的超参在 12 个设置上 sweep 一遍、验证 headline 那张图不是单一 seed 的意外，这是关键工具。配 JupyterLab 做生产级实验跑。\n9. **Overleaf（自托管）** — 协作 LaTeX。真正写论文的地方。自托管版本让未发表的论文不出第三方服务器，对有严格 IP \u002F embargo 规则的领域很重要。BibTeX 直接从 Zotero 流进来。\n10. **AI Scientist** — Sakana AI 的端到端论文自动生成系统。不是用来生成你真正的论文（千万别），是看 AI 辅助科研写作前沿能力到哪一步的一份很有意思的参考，也是用来给 ablation 实验生成可重度编辑的初稿的实用工具。\n\n## 它们怎么协同（研究工作流）\n\n```\n  文献检索\n  ┌────────────────────────────────────┐\n  │ arXiv MCP ──► GPT Researcher       │\n  │  （精准）       （广度地图）         │\n  └─────────────────┬──────────────────┘\n                    ▼\n         ┌───────────────────┐\n         │  Zotero（真相源） │  ◄── BibTeX 出到 Overleaf\n         │  collection +     │\n         │  附 PDF           │\n         └─────────┬─────────┘\n                   ▼\n   PDF 解析 ┌─────────────────┐\n            │ Marker（快）    │\n            │ Nougat（公式）  │\n            └────────┬────────┘\n                     ▼ 干净 markdown\n        ┌─────────────────────────┐\n        │ Academic Researcher     │\n        │ Claude Code agent       │ ── 摘要、引用图、空白点\n        └──────────┬──────────────┘\n                   ▼\n         复现代码\n         ┌───────────────────┐\n         │ JupyterLab        │\n         │   + Papermill     │ ── seed sweep、ablation\n         └────────┬──────────┘\n                  ▼\n            写作\n         ┌───────────────────┐\n         │ Overleaf          │  ◄── Zotero 喂引用\n         │ + AI Scientist    │      （只用初稿 — 你来写）\n         └───────────────────┘\n```\n\n脊椎是 **Zotero 作为「你读过什么」的唯一真相源**。上游所有东西喂 Zotero，下游所有东西从 Zotero 读。没这个纪律，整条 pipeline 会腐化成 4000 个浏览器 tab 加一本你自己都复现不了的论文。\n\n## 你会遇到的取舍\n\n- **AI 摘要 vs 真读** — 这套 pack 最大的风险。GPT Researcher 和 Academic Researcher agent 能在 30 秒内把一篇论文摘要完。这个摘要**够你判断要不要读这篇论文**，**但当作 methodology 的替代品就极其危险**。硬规矩：博士论文里引用的 paper，methods 部分必须自己读完。AI 只用于分诊，不用于「凭感觉引用」。\n- **复现的天花板** — Papermill + JupyterLab 让你能干净跑作者放出来的代码，但很多论文放的代码早就跑不动了（依赖死了、weights 丢了、CUDA 版本不对）。给环境考古留时间。一切用 `conda env export` 锁死。如果某篇论文的 claim 在 rerun 时崩了，这值得在论文里写一条 footnote。\n- **Marker vs Nougat** — Marker 更快、表格处理得好；Nougat 更慢但能真正解析 LaTeX 公式。先跑 Marker；公式才是重点时再上 Nougat。\n- **自托管 Overleaf vs SaaS 版** — SaaS 版方便，但你的草稿在别人机器上。自托管在学校集群（或者就一个 Docker 容器）是未发表工作的正确选择。代价是一个下午的搭建。\n- **AI Scientist 当工具不当目标** — 用 AI 端到端生成论文学术上和伦理上都很危险。把它当作「AI 能到什么程度」的参考架构、当作 ablation 表格的初稿生成器 — 永远不当作绕过真正科学贡献的捷径。\n\n## 常见踩坑\n\n- **过度信任 AI 对 methodology 的摘要** — 摘要必然压缩；methodology 的细节（loss 公式、正则化、数据切分）正是被压缩掉的东西。审稿人偏偏就问这些被砍掉的细节。读 methods。\n- **Zotero PDF 散在各设备** — 第一天就打开 WebDAV \u002F 自己的同步目标。读博第 3 年发现一半带批注的 PDF 只在一台死了的笔记本上 — 经典博士恐怖故事。\n- **只看 notebook 就说复现** — 论文的 `figure_3.ipynb` 可能端到端能跑，但跳过了真正的训练。先看 notebook **干了什么**，再宣布「复现成功」。\n- **只在 arXiv 找文献** — arXiv 快，但偏向 ML \u002F 物理 \u002F 数学。生物、社科、人文大部分文献住在期刊里、只能通过学校权限读。arXiv MCP 用于 arXiv 覆盖的部分，别当万能源。\n- **BibTeX 条目重复** — Zotero 不阻止你在 arXiv 版和期刊版都点连接器，结果同一篇 paper 两条略不同的 metadata 进库。每章交接前跑一次去重。",[108,111,114,117,120],{"q":109,"a":110},"博士刚开始，真有必要第一天就装这 10 个吗？","不用 — 第一周装 Zotero、JupyterLab、Overleaf，这三件会变成肌肉记忆，迁移成本会复利。第二个月、找准子领域之后再加 arXiv MCP 和 academic-researcher agent。Marker、Nougat、Papermill、AI Scientist 等你撞上每个工具对应的具体问题再装 — 别给还不存在的问题预装解决方案。",{"q":112,"a":113},"AI agent 能替我做文献综述吗？","完全没法以「能扛过博士答辩」的程度做。GPT Researcher 和 academic-researcher agent 给陌生领域画第一张地图非常厉害 — 大致是大三本科生文献综述的质量。用它找出经典论文、识别主要流派，然后自己读那些论文。把 AI 生成的综述当作文献综述章节交上去，在大多数大学算抄袭，在所有大学都是学术自杀。",{"q":115,"a":116},"Marker 还是 Nougat — PDF 转文本工具先装哪个？","先装 Marker。它更快、表格图处理得好、90% 的论文都覆盖得过关。等你开始啃公式密集的理论论文，再加 Nougat — Nougat 专门在学术文档上训练，LaTeX 公式保留得好得多。两个都跑、按 paper 挑也行；存储和计算便宜，丢失公式不便宜。",{"q":118,"a":119},"我在 50 篇不同论文之间跑 notebook，怎么保证博士工作可复现？","三条规矩。(1) 每个复现住自己的目录，自己的 `environment.yml` 或 `requirements.txt` 锁死精确版本。(2) 用 Papermill 通过参数调 notebook 而不是原地改 — 源 notebook 保持干净，运行记录可审计。(3) 把执行后的 notebook + 输出和输入参数一起保存，两年后还能证明你跑过什么。conda 环境 + git + 一个装 Papermill 输出的 `RUNS\u002F` 目录解决 95% 的复现痛点。",{"q":121,"a":122},"用 AI Scientist 或 Claude 帮写博士论文符合学术伦理吗？","完全取决于你学校的政策和你诚实披露的程度。2026 年常见共识：AI 用于列大纲、改语法、压力测试想法、生成你之后大改的初稿是 OK 的 — 跟写作辅导老师做的事一样。AI 不 OK 的是：生成原创分析、伪造引用、生成你不改一字就交的段落。拿不准就在 methods 里披露。博士的意义是你能为每一句话辩护；如果你没法为 AI 写的一段话辩护，就别留。",{"@context":124,"@type":125,"name":13,"description":126,"numberOfItems":127,"inLanguage":128},"https:\u002F\u002Fschema.org","ItemList","十个为博士级文献综述、论文 PDF 管理、引用追踪、论文代码复现和学术写作精选的开源工具。",10,"zh-CN",[130,134,138],{"url":131,"anchor":132,"reason":133},"\u002Fzh\u002Fai-tools-for\u002Fresearch","AI 研究工作流工具","本 pack 之外更广的研究相关资产目录",{"url":135,"anchor":136,"reason":137},"\u002Fzh\u002Ftopics","浏览其它主题包","邻近 pack 覆盖 RAG、agent 记忆、second brain，跟文献综述有重叠",{"url":139,"anchor":140,"reason":141},"\u002Fzh\u002Ffeatured","TokRepo 精选资产","这十个工具属于更大的精选目录",[143,147,151],{"claim":144,"source_name":145,"source_url":146},"Zotero 是免费开源的引用管理器，带浏览器插件和 BibTeX 导出","Zotero 官网","https:\u002F\u002Fwww.zotero.org\u002F",{"claim":148,"source_name":149,"source_url":150},"Nougat 是专门在学术文档上训练的神经 OCR 系统","Nougat GitHub","https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fnougat",{"claim":152,"source_name":153,"source_url":154},"Papermill 用命令行参数化和执行 Jupyter notebook","Papermill 文档","https:\u002F\u002Fpapermill.readthedocs.io\u002F",890,"2026-05-22T00:00:00Z"]