[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"workflow-asset-edfd1172":3,"seo:featured-workflow:edfd1172-4ea3-11f1-9bc6-00163e2b0d79:zh":84,"workflow-related-asset-edfd1172-edfd1172-4ea3-11f1-9bc6-00163e2b0d79":85},{"id":4,"uuid":5,"slug":6,"title":7,"description":8,"author_id":9,"author_name":10,"author_avatar":11,"token_estimate":12,"time_saved":12,"model_used":13,"fork_count":12,"vote_count":12,"view_count":12,"parent_id":12,"parent_uuid":13,"lang_type":14,"steps":15,"tags":22,"has_voted":28,"visibility":18,"share_token":13,"is_featured":12,"content_hash":29,"asset_kind":30,"target_tools":31,"install_mode":35,"entrypoint":19,"risk_profile":36,"dependencies":38,"verification":44,"agent_metadata":47,"agent_fit":60,"trust":72,"provenance":81,"created_at":83,"updated_at":83},3571,"edfd1172-4ea3-11f1-9bc6-00163e2b0d79","asset-edfd1172","OmniParser — Screen Parsing Toolkit for AI Agents","OmniParser by Microsoft Research converts screenshots into structured data that AI agents can understand and act upon, enabling vision-based GUI automation across desktop and web applications.","8a911193-3180-11f1-9bc6-00163e2b0d79","AI Open Source","https:\u002F\u002Ftokrepo.com\u002Fapple-touch-icon.png",0,"","en",[16],{"id":17,"step_order":18,"title":19,"description":13,"prompt_template":20,"variables":13,"depends_on":21,"expected_output":13},4131,1,"OmniParser Overview","# OmniParser — Screen Parsing Toolkit for AI Agents\n\n## Quick Use\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FOmniParser.git\ncd OmniParser\npip install -r requirements.txt\npython run_demo.py --screenshot path\u002Fto\u002Fscreenshot.png\n```\n\n## Introduction\nOmniParser is a screen parsing library from Microsoft Research that extracts structured information from UI screenshots. It identifies interactive elements, labels, and layout structure so that vision-language models can plan and execute actions on any graphical interface.\n\n## What OmniParser Does\n- Detects clickable UI elements (buttons, links, inputs) in screenshots\n- Extracts text labels and associates them with detected regions\n- Outputs structured bounding-box data for downstream agent use\n- Works across web pages, desktop apps, and mobile UIs\n- Integrates with vision-language models for end-to-end automation\n\n## Architecture Overview\nOmniParser combines a fine-tuned object detection model for UI element localization with an OCR module for text extraction. The detection model identifies interactable regions while the OCR module reads labels and content. Results are merged into a unified structured output that maps each element to its screen coordinates and semantic label.\n\n## Self-Hosting & Configuration\n- Clone the repo and install Python dependencies\n- Download pre-trained model weights via the provided script\n- Run the demo server for interactive testing\n- Configure detection thresholds via command-line arguments\n- GPU recommended for real-time parsing; CPU mode available\n\n## Key Features\n- Generalizes across different UI frameworks and platforms\n- Provides pixel-accurate bounding boxes for each element\n- Combines detection and OCR in a single pipeline\n- Open weights released under permissive license\n- Benchmarked on ScreenSpot and other UI grounding datasets\n\n## Comparison with Similar Tools\n- **SeeClick** — click prediction model; OmniParser provides full element parsing\n- **CogAgent** — end-to-end GUI agent; OmniParser is a modular parsing component\n- **Ferret-UI** — Apple multimodal model; OmniParser focuses on structured output\n- **Set-of-Mark** — visual prompting overlay; OmniParser directly detects elements\n\n## FAQ\n**Q: What image formats does OmniParser accept?**\nA: Standard formats including PNG, JPEG, and BMP are supported.\n\n**Q: Does it require a GPU?**\nA: GPU is recommended for speed but CPU inference is supported at lower throughput.\n\n**Q: Can OmniParser handle dynamic web pages?**\nA: It operates on static screenshots, so you capture the current state and parse it. For dynamic content, take sequential screenshots.\n\n**Q: How accurate is element detection?**\nA: It achieves strong results on standard UI grounding benchmarks. Accuracy varies by UI complexity.\n\n## Sources\n- https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FOmniParser\n- https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Fresearch\u002Fproject\u002Fomniparser\u002F","0",[23],{"id":24,"name":25,"slug":26,"icon":27},12,"Configs","config","⚙️",false,"1d224ca2d3d02473b27218aaf981d88061c74641c15a74e348d64cdd732acdb2","skill",[32,33,34],"claude_code","codex","gemini_cli","single",{"executes_code":28,"modifies_global_config":28,"requires_secrets":37,"uses_absolute_paths":28,"network_access":28},[],{"npm":39,"pip":40,"brew":42,"system":43},[],[41],"requirements.txt",[],[],{"commands":45,"expected_files":46},[],[19],{"asset_kind":30,"target_tools":48,"install_mode":35,"entrypoint":19,"risk_profile":49,"dependencies":51,"content_hash":29,"verification":56,"inferred":59},[32,33,34],{"executes_code":28,"modifies_global_config":28,"requires_secrets":50,"uses_absolute_paths":28,"network_access":28},[],{"npm":52,"pip":53,"brew":54,"system":55},[],[41],[],[],{"commands":57,"expected_files":58},[],[19],true,{"target":33,"score":61,"status":62,"policy":63,"why":64,"asset_kind":30,"install_mode":35},98,"native","allow",[65,66,67,68,69,70,71],"target_tools includes codex","asset_kind skill","install_mode single","markdown-only","policy allow","safe markdown-only Codex install","trust established",{"author_trust_level":73,"verified_publisher":28,"asset_signed_hash":29,"signature_status":74,"install_count":12,"report_count":12,"dangerous_capability_badges":75,"review_status":76,"signals":77},"established","hash_only",[],"unreviewed",[78,79,80],"author has published assets","content hash available","no dangerous capability badges",{"owner_uuid":9,"owner_name":10,"source_url":82,"content_hash":29,"visibility":18,"created_at":83,"updated_at":83},"https:\u002F\u002Ftokrepo.com\u002Fen\u002Fworkflows\u002Fasset-edfd1172","2026-05-13 16:15:40",null,[86,146,193,238],{"id":87,"uuid":88,"slug":89,"title":90,"description":91,"author_id":9,"author_name":10,"author_avatar":11,"token_estimate":12,"time_saved":12,"model_used":13,"fork_count":12,"vote_count":12,"view_count":92,"parent_id":12,"parent_uuid":13,"lang_type":14,"steps":93,"tags":94,"has_voted":28,"visibility":18,"share_token":13,"is_featured":12,"content_hash":96,"asset_kind":30,"target_tools":97,"install_mode":35,"entrypoint":98,"risk_profile":99,"dependencies":101,"verification":106,"agent_metadata":109,"agent_fit":121,"trust":130,"provenance":134,"created_at":136,"updated_at":137,"__relatedScore":138,"__relatedReasons":139,"__sharedTags":144},2630,"ad0db703-47b4-11f1-9bc6-00163e2b0d79","asset-ad0db703","Screenpipe — 24\u002F7 Local Screen and Mic Recording for AI Agents","An open-source Rust application that continuously captures your screen and microphone locally, creating a searchable personal data layer that AI agents can query and act upon.",58,[],[95],{"id":24,"name":25,"slug":26,"icon":27},"e444b5c3deafa9701ec89bb8b43cd9ed3ef511db6eee343e2940b1d06333a058",[32],"Screenpipe",{"executes_code":28,"modifies_global_config":28,"requires_secrets":100,"uses_absolute_paths":28,"network_access":59},[],{"npm":102,"pip":103,"brew":104,"system":105},[],[],[],[],{"commands":107,"expected_files":108},[],[98],{"asset_kind":30,"target_tools":110,"install_mode":35,"entrypoint":98,"risk_profile":111,"dependencies":113,"content_hash":96,"verification":118},[32],{"executes_code":28,"modifies_global_config":28,"requires_secrets":112,"uses_absolute_paths":28,"network_access":59},[],{"npm":114,"pip":115,"brew":116,"system":117},[],[],[],[],{"commands":119,"expected_files":120},[],[98],{"target":33,"score":122,"status":123,"policy":124,"why":125,"asset_kind":30,"install_mode":35},64,"needs_confirmation","confirm",[126,66,67,127,128,129,71],"target_tools does not include codex","policy confirm","metadata target_tools does not include codex","risk_profile.network_access is true",{"author_trust_level":73,"verified_publisher":28,"asset_signed_hash":96,"signature_status":74,"install_count":12,"report_count":12,"dangerous_capability_badges":131,"review_status":76,"signals":133},[132],"network_access",[78,79],{"owner_uuid":9,"owner_name":10,"source_url":135,"content_hash":96,"visibility":18,"created_at":136,"updated_at":137},"https:\u002F\u002Ftokrepo.com\u002Fen\u002Fworkflows\u002Fasset-ad0db703","2026-05-04 20:27:54","2026-05-13 12:29:09",102.65627801746322,[140,141,142,143],"topic-match","same-kind","same-target","same-author",[26,145],"configs",{"id":147,"uuid":148,"slug":149,"title":150,"description":151,"author_id":9,"author_name":10,"author_avatar":11,"token_estimate":12,"time_saved":12,"model_used":13,"fork_count":12,"vote_count":12,"view_count":152,"parent_id":12,"parent_uuid":13,"lang_type":14,"steps":153,"tags":154,"has_voted":28,"visibility":18,"share_token":13,"is_featured":12,"content_hash":156,"asset_kind":30,"target_tools":157,"install_mode":35,"entrypoint":158,"risk_profile":159,"dependencies":161,"verification":166,"agent_metadata":169,"agent_fit":181,"trust":183,"provenance":186,"created_at":188,"updated_at":189,"__relatedScore":190,"__relatedReasons":191,"__sharedTags":192},3013,"dd1641d6-4d11-11f1-9bc6-00163e2b0d79","asset-dd1641d6","OpenScreen — Open-Source Screen Recording and Demo Creation Tool","Create polished screen recordings and product demos without subscriptions or watermarks, free for commercial use.",17,[],[155],{"id":24,"name":25,"slug":26,"icon":27},"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",[32,33,34],"SKILL.md",{"executes_code":28,"modifies_global_config":28,"requires_secrets":160,"uses_absolute_paths":28,"network_access":28},[],{"npm":162,"pip":163,"brew":164,"system":165},[],[],[],[],{"commands":167,"expected_files":168},[],[],{"asset_kind":30,"target_tools":170,"install_mode":35,"entrypoint":158,"risk_profile":171,"dependencies":173,"content_hash":156,"verification":178,"inferred":59},[32,33,34],{"executes_code":28,"modifies_global_config":28,"requires_secrets":172,"uses_absolute_paths":28,"network_access":28},[],{"npm":174,"pip":175,"brew":176,"system":177},[],[],[],[],{"commands":179,"expected_files":180},[],[],{"target":33,"score":61,"status":62,"policy":63,"why":182,"asset_kind":30,"install_mode":35},[65,66,67,68,69,70,71],{"author_trust_level":73,"verified_publisher":28,"asset_signed_hash":156,"signature_status":74,"install_count":12,"report_count":12,"dangerous_capability_badges":184,"review_status":76,"signals":185},[],[78,79,80],{"owner_uuid":9,"owner_name":10,"source_url":187,"content_hash":156,"visibility":18,"created_at":188,"updated_at":189},"https:\u002F\u002Ftokrepo.com\u002Fen\u002Fworkflows\u002Fasset-dd1641d6","2026-05-11 16:17:34","2026-05-13 14:15:04",81.88290875765496,[140,141,142,143],[26,145],{"id":194,"uuid":195,"slug":196,"title":197,"description":198,"author_id":9,"author_name":10,"author_avatar":11,"token_estimate":12,"time_saved":12,"model_used":13,"fork_count":12,"vote_count":12,"view_count":199,"parent_id":12,"parent_uuid":13,"lang_type":14,"steps":200,"tags":201,"has_voted":28,"visibility":18,"share_token":13,"is_featured":12,"content_hash":156,"asset_kind":30,"target_tools":203,"install_mode":35,"entrypoint":158,"risk_profile":204,"dependencies":206,"verification":211,"agent_metadata":214,"agent_fit":226,"trust":228,"provenance":231,"created_at":233,"updated_at":234,"__relatedScore":235,"__relatedReasons":236,"__sharedTags":237},3330,"52e79f5d-4e1f-11f1-9bc6-00163e2b0d79","asset-52e79f5d","wxWidgets — Cross-Platform C++ GUI Library with Native Look","Build desktop applications in C++ that look and feel native on Windows, macOS, and Linux using each platform's own widget toolkit rather than custom-drawn controls.",13,[],[202],{"id":24,"name":25,"slug":26,"icon":27},[32,33,34],{"executes_code":28,"modifies_global_config":28,"requires_secrets":205,"uses_absolute_paths":28,"network_access":28},[],{"npm":207,"pip":208,"brew":209,"system":210},[],[],[],[],{"commands":212,"expected_files":213},[],[],{"asset_kind":30,"target_tools":215,"install_mode":35,"entrypoint":158,"risk_profile":216,"dependencies":218,"content_hash":156,"verification":223,"inferred":59},[32,33,34],{"executes_code":28,"modifies_global_config":28,"requires_secrets":217,"uses_absolute_paths":28,"network_access":28},[],{"npm":219,"pip":220,"brew":221,"system":222},[],[],[],[],{"commands":224,"expected_files":225},[],[],{"target":33,"score":61,"status":62,"policy":63,"why":227,"asset_kind":30,"install_mode":35},[65,66,67,68,69,70,71],{"author_trust_level":73,"verified_publisher":28,"asset_signed_hash":156,"signature_status":74,"install_count":12,"report_count":12,"dangerous_capability_badges":229,"review_status":76,"signals":230},[],[78,79,80],{"owner_uuid":9,"owner_name":10,"source_url":232,"content_hash":156,"visibility":18,"created_at":233,"updated_at":234},"https:\u002F\u002Ftokrepo.com\u002Fen\u002Fworkflows\u002Fasset-52e79f5d","2026-05-13 00:26:26","2026-05-13 11:17:49",79.71919205351736,[140,141,142,143],[26,145],{"id":239,"uuid":240,"slug":241,"title":242,"description":243,"author_id":244,"author_name":245,"author_avatar":11,"token_estimate":12,"time_saved":12,"model_used":13,"fork_count":12,"vote_count":12,"view_count":246,"parent_id":12,"parent_uuid":13,"lang_type":14,"steps":247,"tags":248,"has_voted":28,"visibility":18,"share_token":13,"is_featured":12,"content_hash":250,"asset_kind":30,"target_tools":251,"install_mode":35,"entrypoint":242,"risk_profile":253,"dependencies":255,"verification":260,"agent_metadata":263,"agent_fit":275,"trust":278,"provenance":282,"created_at":284,"updated_at":285,"__relatedScore":286,"__relatedReasons":287,"__sharedTags":288},519,"6c6059a2-a15c-4160-a806-ea2e1e519993","spec-kit-spec-driven-dev-toolkit-github-85k-stars-6c6059a2","Spec Kit — Spec-Driven Dev Toolkit from GitHub, 85K Stars","GitHub's open-source spec-driven development toolkit. Turn product specs into executable AI agent workflows via the specify CLI instead of vibe coding.","81b6b4dc-2ab8-11f1-9bc6-00163e2b0d79","TokRepo精选",120,[],[249],{"id":24,"name":25,"slug":26,"icon":27},"a461af477e127a484ae65086ddbdf26cd4a5bfa4ffd6cd4a563c0e82e7d60139",[32,252,34],"cursor",{"executes_code":28,"modifies_global_config":28,"requires_secrets":254,"uses_absolute_paths":28,"network_access":28},[],{"npm":256,"pip":257,"brew":258,"system":259},[],[],[],[],{"commands":261,"expected_files":262},[],[242],{"asset_kind":30,"target_tools":264,"install_mode":35,"entrypoint":242,"risk_profile":265,"dependencies":267,"content_hash":250,"verification":272},[32,252,34],{"executes_code":28,"modifies_global_config":28,"requires_secrets":266,"uses_absolute_paths":28,"network_access":28},[],{"npm":268,"pip":269,"brew":270,"system":271},[],[],[],[],{"commands":273,"expected_files":274},[],[242],{"target":33,"score":276,"status":123,"policy":124,"why":277,"asset_kind":30,"install_mode":35},66,[126,66,67,68,127,128,71],{"author_trust_level":73,"verified_publisher":28,"asset_signed_hash":250,"signature_status":74,"install_count":12,"report_count":12,"dangerous_capability_badges":279,"review_status":76,"signals":280},[],[281,78,79,80],"asset has usage views",{"owner_uuid":244,"owner_name":245,"source_url":283,"content_hash":250,"visibility":18,"created_at":284,"updated_at":285},"https:\u002F\u002Ftokrepo.com\u002Fen\u002Fworkflows\u002Fspec-kit-spec-driven-dev-toolkit-github-85k-stars-6c6059a2","2026-04-06 15:16:38","2026-05-13 04:33:24",75.12417805547467,[140,141,142],[26,145]]