3 Commits

Author SHA1 Message Date
panda b0bf0fa9bc 优化连接不上时创建表 2025-11-05 09:50:55 +08:00
panda 4154eb452f 钉钉api 2025-10-30 17:24:28 +08:00
panda c5a5a0a99c 生成日报、周报 2025-10-30 09:54:47 +08:00
18 changed files with 3356 additions and 191 deletions
+459
View File
@@ -0,0 +1,459 @@
"""
报告生成器基类
提供数据源接口、AI处理接口等扩展能力
"""
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
import os
import sys
from loguru import logger
# 添加父目录到路径
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(os.path.dirname(current_dir))
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
from utils.mysql_agent import MySQLAgent
from config import Config
class DataSource(ABC):
"""数据源接口基类,用于后续扩展其他数据源"""
@abstractmethod
def fetch_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]:
"""
获取指定时间范围内的数据
Args:
start_time: 开始时间
end_time: 结束时间
Returns:
数据列表,每条数据应包含:标题、链接、摘要、发布时间等字段
"""
pass
@abstractmethod
def get_source_name(self) -> str:
"""获取数据源名称"""
pass
class RSSDataSource(DataSource):
"""RSS数据源实现"""
def __init__(self, db_agent: MySQLAgent, table_name: str = "collector_rss_subscriptions"):
self.db_agent = db_agent
self.table_name = table_name
self.logger = logger.bind(module="RSSDataSource")
def fetch_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]:
"""从数据库获取RSS数据"""
try:
sql = f"""
SELECT
`文章标题` as title,
`文章链接` as link,
`文章摘要` as summary,
`发布时间` as publish_time,
`来源URL` as source_url,
`创建时间` as create_time
FROM `{self.table_name}`
WHERE `发布时间` >= %s AND `发布时间` < %s
ORDER BY `发布时间` DESC
"""
params = (
start_time.strftime('%Y-%m-%d %H:%M:%S'),
end_time.strftime('%Y-%m-%d %H:%M:%S')
)
df = self.db_agent.query_to_df(sql, params=params, is_print=False)
if df.empty:
self.logger.info(f"时间范围 {start_time}{end_time} 内没有RSS数据")
return []
# 转换为字典列表
data_list = df.to_dict('records')
self.logger.info(f"获取到 {len(data_list)} 条RSS数据")
return data_list
except Exception as e:
self.logger.error(f"获取RSS数据失败: {str(e)}", exc_info=True)
return []
def get_source_name(self) -> str:
return "RSS订阅"
class AIAnalysisDataSource(DataSource):
"""AI分析结果数据源实现 - 从ai_processor_rss_analysis表获取已筛选的相关内容"""
def __init__(self, db_agent: MySQLAgent, table_name: str = "ai_processor_rss_analysis"):
self.db_agent = db_agent
self.table_name = table_name
self.logger = logger.bind(module="AIAnalysisDataSource")
def fetch_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]:
"""从AI分析结果表获取相关数据(是否相关=1)"""
try:
sql = f"""
SELECT
`文章标题` as title,
`文章链接` as link,
`文章摘要` as summary,
`发布时间` as publish_time,
`来源URL` as source_url,
`分类` as category,
`标签` as tags,
`相关度评分` as relevance_score,
`分析说明` as analysis_note,
`处理时间` as process_time
FROM `{self.table_name}`
WHERE `发布时间` >= %s AND `发布时间` < %s
AND `是否相关` = 1
ORDER BY `发布时间` DESC, `相关度评分` DESC
"""
params = (
start_time.strftime('%Y-%m-%d %H:%M:%S'),
end_time.strftime('%Y-%m-%d %H:%M:%S')
)
df = self.db_agent.query_to_df(sql, params=params, is_print=False)
if df.empty:
self.logger.info(f"时间范围 {start_time}{end_time} 内没有相关数据(是否相关=1")
return []
# 转换为字典列表
data_list = df.to_dict('records')
self.logger.info(f"获取到 {len(data_list)} 条相关数据(是否相关=1")
return data_list
except Exception as e:
self.logger.error(f"获取AI分析数据失败: {str(e)}", exc_info=True)
return []
def get_source_name(self) -> str:
return "AI分析结果"
class AIProcessor:
"""AI处理器,用于筛选和分析内容"""
def __init__(self, api_key: str = None, model: str = None):
from openai import OpenAI
self.base_url = 'https://qianfan.baidubce.com/v2'
self.api_key = api_key or Config.BAIDU_AI_CONFIG.get('api_key')
self.model = model or Config.BAIDU_AI_CONFIG.get('model', 'ernie-x1-turbo-32k')
self.client = OpenAI(
base_url=self.base_url,
api_key=self.api_key
)
self.logger = logger.bind(module="AIProcessor")
def filter_automotive_content(self, articles: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
筛选与汽车后市场相关的内容
Args:
articles: 文章列表
Returns:
筛选后的文章列表(包含AI标记信息)
"""
if not articles:
return []
self.logger.info(f"开始AI筛选 {len(articles)} 篇文章")
# 批量处理,避免API限流
batch_size = 10
filtered_articles = []
for i in range(0, len(articles), batch_size):
batch = articles[i:i + batch_size]
try:
# 构建批量分析的prompt
articles_text = ""
for idx, article in enumerate(batch):
articles_text += f"\n[{idx + i}] 标题: {article.get('title', '')}\n"
articles_text += f"摘要: {article.get('summary', '')}\n"
prompt = f"""请分析以下新闻文章,判断哪些与汽车后市场相关。
汽车后市场的定义:汽车销售以后,围绕汽车使用过程中的各种服务,包括:
- 汽车维修保养
- 汽车配件
- 汽车改装
- 汽车美容
- 汽车用品
- 汽车金融
- 汽车保险
- 二手车交易
- 汽车租赁
- 汽车检测
- 汽车报废回收
- 汽车相关法律法规和政策
文章列表:
{articles_text}
请按以下JSON格式返回结果:
{{
"related_articles": [
{{
"index": 文章的序号(从0开始),
"is_related": true/false,
"reason": "判断理由",
"category": "所属类别(如:维修保养、配件、政策等)"
}}
]
}}
只返回JSON,不要其他文字说明。"""
response = self.client.chat.completions.create(
model=self.model,
messages=[{
"role": "user",
"content": prompt
}]
)
result_text = response.choices[0].message.content.strip()
# 尝试解析JSON(去除可能的markdown代码块标记)
import json
import re
# 提取JSON部分(尝试多种方式)
result_json = None
# 方式1:查找markdown代码块中的JSON
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result_text, re.DOTALL)
if json_match:
try:
result_json = json.loads(json_match.group(1))
except:
pass
# 方式2:直接查找JSON对象
if result_json is None:
json_match = re.search(r'\{.*\}', result_text, re.DOTALL)
if json_match:
try:
result_json = json.loads(json_match.group())
except:
pass
# 方式3:尝试直接解析
if result_json is None:
try:
result_json = json.loads(result_text)
except:
self.logger.warning(f"无法解析AI返回的JSON: {result_text[:200]}")
result_json = {'related_articles': []}
# 处理结果
for item in result_json.get('related_articles', []):
idx = item.get('index', -1)
if 0 <= idx < len(batch):
article = batch[idx]
if item.get('is_related', False):
article['ai_marked'] = True
article['ai_category'] = item.get('category', '其他')
article['ai_reason'] = item.get('reason', '')
filtered_articles.append(article)
# 避免API限流
import time
if i + batch_size < len(articles):
time.sleep(1.5)
except Exception as e:
self.logger.error(f"AI筛选批处理失败: {str(e)}", exc_info=True)
# 如果AI处理失败,保留所有文章但标记为未筛选
for article in batch:
article['ai_marked'] = False
article['ai_error'] = str(e)
self.logger.info(f"AI筛选完成,找到 {len(filtered_articles)} 篇相关文章")
return filtered_articles
def generate_news_summary(self, articles: List[Dict[str, Any]]) -> str:
"""
生成新闻摘要
Args:
articles: 筛选后的文章列表
Returns:
Markdown格式的新闻摘要
"""
if not articles:
return "## 相关新闻\n\n暂无相关新闻。\n"
articles_text = ""
for idx, article in enumerate(articles, 1):
category = article.get('ai_category', '其他')
reason = article.get('ai_reason', '')
articles_text += f"\n### {idx}. {article.get('title', '无标题')}\n"
articles_text += f"- **类别**: {category}\n"
articles_text += f"- **摘要**: {article.get('summary', '无摘要')}\n"
articles_text += f"- **链接**: [{article.get('link', '')}]({article.get('link', '')})\n"
articles_text += f"- **发布时间**: {article.get('publish_time', '')}\n"
if reason:
articles_text += f"- **相关性说明**: {reason}\n"
articles_text += "\n"
return f"## 汽车后市场相关新闻\n\n共找到 {len(articles)} 篇相关新闻:\n\n{articles_text}"
class BaseReporter:
"""报告生成器基类"""
def __init__(self, data_sources: List[DataSource] = None):
self.data_sources = data_sources or []
self.ai_processor = AIProcessor()
self.logger = logger.bind(module="BaseReporter")
def add_data_source(self, data_source: DataSource):
"""添加数据源"""
self.data_sources.append(data_source)
self.logger.info(f"添加数据源: {data_source.get_source_name()}")
def collect_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]:
"""从所有数据源收集数据"""
all_data = []
for source in self.data_sources:
try:
data = source.fetch_data(start_time, end_time)
# 标记数据来源
for item in data:
item['data_source'] = source.get_source_name()
all_data.extend(data)
except Exception as e:
self.logger.error(f"{source.get_source_name()} 收集数据失败: {str(e)}")
# 按发布时间排序
all_data.sort(key=lambda x: x.get('publish_time', ''), reverse=True)
return all_data
def generate_report_content(self, articles: List[Dict[str, Any]], report_type: str = "日报") -> str:
"""
生成报告内容(Markdown格式)
Args:
articles: 文章列表(已从AI分析结果表筛选,是否相关=1)
report_type: 报告类型("日报""周报"),用于无数据时的提示
"""
# 数据已经是从AI分析结果表筛选过的(是否相关=1),直接使用
related_articles = articles
# 生成统计信息
related_count = len(related_articles)
# 如果没有相关数据,返回提示信息
if related_count == 0:
if report_type == "日报":
message = "昨日无汽车后市场相关的新闻"
else:
message = "上周无汽车后市场相关的新闻"
return f"""
## 数据统计
- **相关文章数**: 0
## 相关新闻
{message}
"""
# 生成新闻摘要
news_summary = self._generate_news_summary_from_analysis(related_articles)
stats = f"""
## 数据统计
- **相关文章数**: {related_count}
"""
return stats + news_summary
def _generate_news_summary_from_analysis(self, articles: List[Dict[str, Any]]) -> str:
"""
从AI分析结果生成新闻摘要(使用数据库中已有的分类和分析说明)
Args:
articles: 文章列表(包含category、tags、analysis_note等字段)
Returns:
Markdown格式的新闻摘要
"""
if not articles:
return "## 相关新闻\n\n暂无相关新闻。\n"
articles_text = ""
for idx, article in enumerate(articles, 1):
category = article.get('category', '其他')
tags = article.get('tags', '')
analysis_note = article.get('analysis_note', '')
relevance_score = article.get('relevance_score', '')
articles_text += f"\n### {idx}. {article.get('title', '无标题')}\n"
articles_text += f"- **分类**: {category}\n"
if tags:
articles_text += f"- **标签**: {tags}\n"
articles_text += f"- **摘要**: {article.get('summary', '无摘要')}\n"
articles_text += f"- **链接**: [{article.get('link', '')}]({article.get('link', '')})\n"
articles_text += f"- **发布时间**: {article.get('publish_time', '')}\n"
if relevance_score:
articles_text += f"- **相关度评分**: {relevance_score}\n"
if analysis_note:
articles_text += f"- **分析说明**: {analysis_note}\n"
articles_text += "\n"
return f"## 汽车后市场相关新闻\n\n共找到 {len(articles)} 篇相关新闻:\n\n{articles_text}"
def generate_html_report(self, markdown_content: str, template_path: str = None) -> str:
"""生成HTML报告"""
# 使用相对导入避免循环依赖
from .html_template import HTMLTemplateManager
template_manager = HTMLTemplateManager()
if template_path and os.path.exists(template_path):
# 使用外部模板
html_content = template_manager.render_external_template(template_path, markdown_content)
else:
# 使用内置模板
html_content = template_manager.render_builtin_template(markdown_content)
return html_content
def save_report(self, html_content: str, output_path: str):
"""保存HTML报告到文件"""
os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else '.', exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(html_content)
self.logger.info(f"HTML报告已保存到: {output_path}")
def save_markdown_report(self, markdown_content: str, output_path: str):
"""保存Markdown报告到文件"""
os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else '.', exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(markdown_content)
self.logger.info(f"Markdown报告已保存到: {output_path}")
+139
View File
@@ -0,0 +1,139 @@
"""
日报生成器 - 生成24小时内的汽车后市场情报报告
"""
import os
import sys
from datetime import datetime, timedelta
from loguru import logger
# 添加父目录到路径
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(os.path.dirname(current_dir))
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
from applications.reporter.base_reporter import BaseReporter, AIAnalysisDataSource
from applications.reporter.dingtalk_webhook import DingTalkWebhook
from utils.mysql_agent import MySQLAgent
from config import Config
class DailyReporter(BaseReporter):
"""日报生成器"""
def __init__(self, dingtalk_webhook: str = None):
"""
初始化日报生成器
Args:
dingtalk_webhook: 钉钉Webhook地址(可选)
"""
super().__init__()
# 初始化数据库连接
db_agent = MySQLAgent(Config.MYSQL_CONFIG)
# 添加AI分析结果数据源(已筛选是否相关=1)
self.add_data_source(AIAnalysisDataSource(db_agent))
self.logger = logger.bind(module="DailyReporter")
# 初始化钉钉推送(如果提供了webhook)
self.dingtalk_webhook = dingtalk_webhook or getattr(Config, 'DINGTALK_WEBHOOK', None)
self.dingtalk_client = None
if self.dingtalk_webhook:
self.dingtalk_client = DingTalkWebhook(self.dingtalk_webhook)
self.logger.info("已启用钉钉推送功能")
def generate(self, output_dir: str = "output/reports/daily",
template_path: str = None,
save_markdown: bool = True,
send_dingtalk: bool = True) -> dict:
"""
生成日报
Args:
output_dir: 输出目录
template_path: 可选的外部HTML模板路径
save_markdown: 是否保存Markdown文件
send_dingtalk: 是否发送到钉钉
Returns:
包含生成文件路径的字典
"""
self.logger.info("开始生成日报")
# 计算时间范围:24小时内
end_time = datetime.now()
start_time = end_time - timedelta(hours=24)
self.logger.info(f"时间范围: {start_time.strftime('%Y-%m-%d %H:%M:%S')}{end_time.strftime('%Y-%m-%d %H:%M:%S')}")
# 收集数据
articles = self.collect_data(start_time, end_time)
# 生成报告内容(generate_report_content会自动处理空数据情况)
markdown_content = f"""# 汽车后市场情报日报
## 报告时间
**生成时间**: {end_time.strftime('%Y-%m-%d %H:%M:%S')}
**时间范围**: {start_time.strftime('%Y-%m-%d %H:%M:%S')}{end_time.strftime('%Y-%m-%d %H:%M:%S')}
{self.generate_report_content(articles, report_type="日报")}
"""
# 生成HTML报告
html_content = self.generate_html_report(markdown_content, template_path=template_path)
# 保存报告
os.makedirs(output_dir, exist_ok=True)
timestamp = end_time.strftime('%Y%m%d_%H%M%S')
result = {}
# 保存HTML报告
html_filename = f"daily_report_{timestamp}.html"
html_path = os.path.join(output_dir, html_filename)
self.save_report(html_content, html_path)
result['html_path'] = html_path
self.logger.info(f"HTML报告已保存: {html_path}")
# 保存Markdown报告
markdown_path = None
if save_markdown:
markdown_filename = f"daily_report_{timestamp}.md"
markdown_path = os.path.join(output_dir, markdown_filename)
self.save_markdown_report(markdown_content, markdown_path)
result['markdown_path'] = markdown_path
self.logger.info(f"Markdown报告已保存: {markdown_path}")
# 发送到钉钉
if send_dingtalk and self.dingtalk_client:
title = f"汽车后市场情报日报 - {end_time.strftime('%Y-%m-%d')}"
success = self.dingtalk_client.send_report(title, markdown_content, markdown_path)
result['dingtalk_sent'] = success
if success:
self.logger.info("报告已推送到钉钉群")
else:
self.logger.warning("报告推送到钉钉群失败")
self.logger.info(f"日报生成完成")
return result
def main():
"""主函数"""
try:
reporter = DailyReporter()
result = reporter.generate()
print(f"日报已生成:")
print(f" HTML: {result.get('html_path')}")
if 'markdown_path' in result:
print(f" Markdown: {result.get('markdown_path')}")
if 'dingtalk_sent' in result:
print(f" 钉钉推送: {'成功' if result.get('dingtalk_sent') else '失败'}")
except Exception as e:
logger.error(f"生成日报失败: {str(e)}", exc_info=True)
raise
if __name__ == "__main__":
main()
+399
View File
@@ -0,0 +1,399 @@
"""
HTML模板管理器
支持内置模板和外部HTML模板
"""
import os
import markdown
from bs4 import BeautifulSoup
import re
from typing import Optional
from loguru import logger
class HTMLTemplateManager:
"""HTML模板管理器"""
def __init__(self):
self.logger = logger.bind(module="HTMLTemplateManager")
def markdown_to_html(self, markdown_content: str) -> str:
"""将Markdown转换为HTML"""
html = markdown.markdown(
markdown_content,
extensions=['tables', 'fenced_code', 'codehilite']
)
return html
def render_builtin_template(self, markdown_content: str) -> str:
"""使用内置模板渲染HTML"""
html_body = self.markdown_to_html(markdown_content)
# 增强HTML结构
soup = BeautifulSoup(html_body, 'html.parser')
self._enhance_html_structure(soup)
# 生成完整HTML
html_template = f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>汽车后市场情报报告</title>
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@300;400;500;700&display=swap" rel="stylesheet">
<style>
:root {{
--primary: #3498db;
--secondary: #2ecc71;
--accent: #e74c3c;
--dark: #2c3e50;
--light: #f8f9fa;
--border: #e0e0e0;
}}
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: 'Noto Sans SC', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.8;
color: #333;
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
padding: 20px;
}}
.report-container {{
max-width: 1200px;
margin: 0 auto;
padding: 40px;
background: white;
box-shadow: 0 10px 40px rgba(0,0,0,0.1);
border-radius: 12px;
}}
.report-header {{
text-align: center;
padding-bottom: 30px;
border-bottom: 3px solid var(--primary);
margin-bottom: 40px;
}}
.report-header h1 {{
color: var(--dark);
font-size: 2.5em;
margin-bottom: 10px;
}}
.report-header .report-date {{
color: #666;
font-size: 1.1em;
}}
h1 {{
color: var(--dark);
font-size: 2em;
margin: 30px 0 20px 0;
padding-bottom: 10px;
border-bottom: 2px solid var(--primary);
}}
h2 {{
color: var(--dark);
font-size: 1.6em;
margin: 25px 0 15px 0;
padding-left: 10px;
border-left: 4px solid var(--primary);
}}
h3 {{
color: var(--dark);
font-size: 1.3em;
margin: 20px 0 10px 0;
}}
h4 {{
color: #555;
font-size: 1.1em;
margin: 15px 0 8px 0;
}}
p {{
margin: 12px 0;
text-align: justify;
}}
ul, ol {{
margin: 15px 0;
padding-left: 30px;
}}
li {{
margin: 8px 0;
}}
/* 表格样式 */
table {{
width: 100%;
border-collapse: collapse;
margin: 25px 0;
box-shadow: 0 2px 15px rgba(0,0,0,0.1);
border-radius: 8px;
overflow: hidden;
}}
table thead {{
background: linear-gradient(135deg, var(--primary) 0%, #2980b9 100%);
color: white;
}}
table th {{
padding: 15px;
text-align: left;
font-weight: 600;
}}
table td {{
padding: 12px 15px;
border-bottom: 1px solid var(--border);
}}
table tbody tr:hover {{
background-color: #f5f5f5;
}}
table tbody tr:last-child td {{
border-bottom: none;
}}
/* 代码块样式 */
pre {{
background: #f4f4f4;
border: 1px solid var(--border);
border-radius: 6px;
padding: 15px;
overflow-x: auto;
margin: 20px 0;
}}
code {{
background: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}}
pre code {{
background: none;
padding: 0;
}}
/* 链接样式 */
a {{
color: var(--primary);
text-decoration: none;
border-bottom: 1px dotted var(--primary);
transition: all 0.3s;
}}
a:hover {{
color: var(--accent);
border-bottom-color: var(--accent);
}}
/* 新闻列表样式 */
.news-item {{
background: #f9f9f9;
border-left: 4px solid var(--secondary);
padding: 15px 20px;
margin: 15px 0;
border-radius: 6px;
transition: all 0.3s;
}}
.news-item:hover {{
background: #f0f0f0;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}}
.news-item h3 {{
margin-top: 0;
color: var(--dark);
}}
.news-item .news-meta {{
color: #666;
font-size: 0.9em;
margin-top: 10px;
}}
.news-item .news-category {{
display: inline-block;
background: var(--secondary);
color: white;
padding: 3px 10px;
border-radius: 12px;
font-size: 0.85em;
margin-right: 10px;
}}
/* 统计信息样式 */
.stats-box {{
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 25px;
border-radius: 10px;
margin: 25px 0;
}}
.stats-box h2 {{
color: white;
border: none;
padding: 0;
margin: 0 0 15px 0;
}}
.stats-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin-top: 20px;
}}
.stat-item {{
text-align: center;
}}
.stat-number {{
font-size: 2.5em;
font-weight: bold;
margin-bottom: 5px;
}}
.stat-label {{
font-size: 0.9em;
opacity: 0.9;
}}
/* 响应式设计 */
@media (max-width: 768px) {{
.report-container {{
padding: 20px;
}}
.report-header h1 {{
font-size: 1.8em;
}}
h1 {{
font-size: 1.6em;
}}
h2 {{
font-size: 1.3em;
}}
table {{
font-size: 0.9em;
}}
table th,
table td {{
padding: 8px;
}}
}}
/* 打印样式 */
@media print {{
body {{
background: white;
padding: 0;
}}
.report-container {{
box-shadow: none;
padding: 0;
}}
}}
</style>
</head>
<body>
<div class="report-container">
{str(soup)}
</div>
</body>
</html>"""
return html_template
def render_external_template(self, template_path: str, markdown_content: str) -> str:
"""
使用外部HTML模板渲染
Args:
template_path: 外部模板文件路径
markdown_content: Markdown内容
Returns:
渲染后的HTML内容
"""
try:
with open(template_path, 'r', encoding='utf-8') as f:
template = f.read()
html_body = self.markdown_to_html(markdown_content)
# 查找模板中的占位符并替换
# 支持 {{content}} 或 {content} 等格式
patterns = [
r'\{\{content\}\}',
r'\{content\}',
r'<!--\s*content\s*-->',
]
replaced = False
for pattern in patterns:
if re.search(pattern, template, re.IGNORECASE):
template = re.sub(pattern, html_body, template, flags=re.IGNORECASE)
replaced = True
break
if not replaced:
# 如果没有找到占位符,在body标签内追加内容
soup = BeautifulSoup(template, 'html.parser')
body = soup.find('body')
if body:
body.append(BeautifulSoup(html_body, 'html.parser'))
else:
# 如果没有body标签,在html末尾追加
template += html_body
template = str(soup) if soup else template
self.logger.info(f"使用外部模板渲染: {template_path}")
return template
except Exception as e:
self.logger.error(f"使用外部模板失败: {str(e)},回退到内置模板", exc_info=True)
return self.render_builtin_template(markdown_content)
def _enhance_html_structure(self, soup: BeautifulSoup):
"""增强HTML结构"""
# 增强表格
for table in soup.find_all('table'):
if not table.get('class'):
table['class'] = 'data-table'
# 增强列表项
for ul in soup.find_all('ul'):
# 检查是否是新闻列表
if any('新闻' in str(item) for item in ul.find_all('li')):
ul['class'] = 'news-list'
# 增强链接
for a in soup.find_all('a'):
if not a.get('target'):
a['target'] = '_blank'
a['rel'] = 'noopener noreferrer'
@@ -0,0 +1,50 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>自定义报告模板示例</title>
<style>
/* 自定义样式示例 */
body {
font-family: 'Microsoft YaHei', Arial, sans-serif;
background: #f0f2f5;
padding: 20px;
margin: 0;
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
padding: 30px;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
/* 内容区域样式 */
#content {
line-height: 1.8;
}
h1 {
color: #1890ff;
border-bottom: 2px solid #1890ff;
padding-bottom: 10px;
}
h2 {
color: #333;
margin-top: 30px;
}
</style>
</head>
<body>
<div class="container">
<!-- 占位符:内容将在这里插入 -->
<!-- 支持以下格式之一:{{content}} 或 {content} 或 <!-- content --> -->
{{content}}
</div>
</body>
</html>
+139
View File
@@ -0,0 +1,139 @@
"""
周报生成器 - 生成7天内的汽车后市场情报报告
"""
import os
import sys
from datetime import datetime, timedelta
from loguru import logger
# 添加父目录到路径
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(os.path.dirname(current_dir))
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
from applications.reporter.base_reporter import BaseReporter, AIAnalysisDataSource
from applications.reporter.dingtalk_webhook import DingTalkWebhook
from utils.mysql_agent import MySQLAgent
from config import Config
class WeeklyReporter(BaseReporter):
"""周报生成器"""
def __init__(self, dingtalk_webhook: str = None):
"""
初始化周报生成器
Args:
dingtalk_webhook: 钉钉Webhook地址(可选)
"""
super().__init__()
# 初始化数据库连接
db_agent = MySQLAgent(Config.MYSQL_CONFIG)
# 添加AI分析结果数据源(已筛选是否相关=1)
self.add_data_source(AIAnalysisDataSource(db_agent))
self.logger = logger.bind(module="WeeklyReporter")
# 初始化钉钉推送(如果提供了webhook)
self.dingtalk_webhook = dingtalk_webhook or getattr(Config, 'DINGTALK_WEBHOOK', None)
self.dingtalk_client = None
if self.dingtalk_webhook:
self.dingtalk_client = DingTalkWebhook(self.dingtalk_webhook)
self.logger.info("已启用钉钉推送功能")
def generate(self, output_dir: str = "output/reports/weekly",
template_path: str = None,
save_markdown: bool = True,
send_dingtalk: bool = True) -> dict:
"""
生成周报
Args:
output_dir: 输出目录
template_path: 可选的外部HTML模板路径
save_markdown: 是否保存Markdown文件
send_dingtalk: 是否发送到钉钉
Returns:
包含生成文件路径的字典
"""
self.logger.info("开始生成周报")
# 计算时间范围:7天内
end_time = datetime.now()
start_time = end_time - timedelta(days=7)
self.logger.info(f"时间范围: {start_time.strftime('%Y-%m-%d %H:%M:%S')}{end_time.strftime('%Y-%m-%d %H:%M:%S')}")
# 收集数据
articles = self.collect_data(start_time, end_time)
# 生成报告内容(generate_report_content会自动处理空数据情况)
markdown_content = f"""# 汽车后市场情报周报
## 报告时间
**生成时间**: {end_time.strftime('%Y-%m-%d %H:%M:%S')}
**时间范围**: {start_time.strftime('%Y-%m-%d %H:%M:%S')}{end_time.strftime('%Y-%m-%d %H:%M:%S')}
{self.generate_report_content(articles, report_type="周报")}
"""
# 生成HTML报告
html_content = self.generate_html_report(markdown_content, template_path=template_path)
# 保存报告
os.makedirs(output_dir, exist_ok=True)
timestamp = end_time.strftime('%Y%m%d_%H%M%S')
result = {}
# 保存HTML报告
html_filename = f"weekly_report_{timestamp}.html"
html_path = os.path.join(output_dir, html_filename)
self.save_report(html_content, html_path)
result['html_path'] = html_path
self.logger.info(f"HTML报告已保存: {html_path}")
# 保存Markdown报告
markdown_path = None
if save_markdown:
markdown_filename = f"weekly_report_{timestamp}.md"
markdown_path = os.path.join(output_dir, markdown_filename)
self.save_markdown_report(markdown_content, markdown_path)
result['markdown_path'] = markdown_path
self.logger.info(f"Markdown报告已保存: {markdown_path}")
# 发送到钉钉
if send_dingtalk and self.dingtalk_client:
title = f"汽车后市场情报周报 - {start_time.strftime('%Y-%m-%d')}{end_time.strftime('%Y-%m-%d')}"
success = self.dingtalk_client.send_report(title, markdown_content, markdown_path)
result['dingtalk_sent'] = success
if success:
self.logger.info("报告已推送到钉钉群")
else:
self.logger.warning("报告推送到钉钉群失败")
self.logger.info(f"周报生成完成")
return result
def main():
"""主函数"""
try:
reporter = WeeklyReporter()
result = reporter.generate()
print(f"周报已生成:")
print(f" HTML: {result.get('html_path')}")
if 'markdown_path' in result:
print(f" Markdown: {result.get('markdown_path')}")
if 'dingtalk_sent' in result:
print(f" 钉钉推送: {'成功' if result.get('dingtalk_sent') else '失败'}")
except Exception as e:
logger.error(f"生成周报失败: {str(e)}", exc_info=True)
raise
if __name__ == "__main__":
main()
+7 -1
View File
@@ -41,4 +41,10 @@ class Config:
'delay': 1.5, # 每条记录之间的延迟(秒),避免API限流
'source_table': 'processed_rss_data', # 源数据表
'result_table': 'ai_processor_rss_analysis', # AI分析结果表
}
}
# 钉钉Webhook配置
# 优先从环境变量读取,如果没有则使用下面的默认值(需要用户自行配置)
# 请将下面的空字符串替换为你的钉钉Webhook地址,格式:https://oapi.dingtalk.com/robot/send?access_token=xxx
DINGTALK_WEBHOOK = os.getenv('DINGTALK_WEBHOOK', '') # 钉钉机器人Webhook地址
# 例如:DINGTALK_WEBHOOK = os.getenv('DINGTALK_WEBHOOK', 'https://oapi.dingtalk.com/robot/send?access_token=your_token_here')
+135
View File
@@ -0,0 +1,135 @@
@echo off
REM 情报数据处理系统 - 简化启动脚本
REM 功能: Python环境检测 + 系统启动
REM 作者: AI Assistant
REM 版本: 1.0
REM 日期: 2025-10-29
chcp 65001 >nul
setlocal enabledelayedexpansion
REM 设置颜色
for /f %%a in ('echo prompt $E ^| cmd') do set "ESC=%%a"
set "GREEN=%ESC%[32m"
set "RED=%ESC%[31m"
set "YELLOW=%ESC%[33m"
set "CYAN=%ESC%[36m"
set "RESET=%ESC%[0m"
REM 配置变量
set "CONDA_ENV_NAME=intelligence_env"
set "PROJECT_PATH=%~dp0.."
set "PYTHON_VERSION=3.13"
echo %CYAN%===============================================%RESET%
echo %CYAN% 情报数据处理系统启动器%RESET%
echo %CYAN%===============================================%RESET%
echo.
REM 检查项目路径
if not exist "%PROJECT_PATH%\main.py" (
echo %RED%错误: 项目路径不存在或main.py文件未找到%RESET%
echo %YELLOW%当前路径: %PROJECT_PATH%%RESET%
pause
exit /b 1
)
echo %GREEN%✓ 项目路径检查通过%RESET%
REM 检查Python是否安装
echo %CYAN%检查Python环境...%RESET%
python --version >nul 2>&1
if %errorLevel% neq 0 (
echo %RED%Python未安装或未添加到PATH%RESET%
echo %YELLOW%正在尝试检测Anaconda...%RESET%
REM 检查Anaconda
where conda >nul 2>&1
if %errorLevel% neq 0 (
echo %RED%Anaconda未安装%RESET%
echo %YELLOW%请安装Python或Anaconda后重试%RESET%
echo %CYAN%下载地址: https://www.python.org/downloads/%RESET%
echo %CYAN%或: https://www.anaconda.com/products/distribution%RESET%
pause
exit /b 1
) else (
echo %GREEN%✓ 检测到Anaconda%RESET%
conda --version
)
) else (
echo %GREEN%✓ Python已安装%RESET%
python --version
)
REM 检查Conda环境
echo %CYAN%检查Conda环境: %CONDA_ENV_NAME%%RESET%
conda env list | findstr /i "%CONDA_ENV_NAME%" >nul 2>&1
if %errorLevel% neq 0 (
echo %YELLOW%环境不存在,正在创建...%RESET%
conda create -n %CONDA_ENV_NAME% python=%PYTHON_VERSION% -y
if %errorLevel% neq 0 (
echo %RED%环境创建失败%RESET%
pause
exit /b 1
)
echo %GREEN%✓ 环境创建成功%RESET%
) else (
echo %GREEN%✓ 环境已存在%RESET%
)
REM 激活环境
echo %CYAN%激活Conda环境...%RESET%
call conda activate %CONDA_ENV_NAME%
if %errorLevel% neq 0 (
echo %RED%环境激活失败%RESET%
pause
exit /b 1
)
echo %GREEN%✓ 环境激活成功%RESET%
REM 检查依赖
echo %CYAN%检查Python依赖...%RESET%
if exist "%PROJECT_PATH%\requirements.txt" (
echo %YELLOW%安装/更新依赖包...%RESET%
pip install -r "%PROJECT_PATH%\requirements.txt" --quiet
if %errorLevel% neq 0 (
echo %YELLOW%依赖安装失败,尝试继续运行...%RESET%
) else (
echo %GREEN%✓ 依赖安装完成%RESET%
)
) else (
echo %YELLOW%未找到requirements.txt,跳过依赖安装%RESET%
)
REM 切换到项目目录
echo %CYAN%切换到项目目录: %PROJECT_PATH%%RESET%
cd /d "%PROJECT_PATH%"
REM 检查配置文件
if not exist "config.py" (
echo %YELLOW%警告: 未找到config.py配置文件%RESET%
echo %CYAN%将使用默认配置运行%RESET%
)
REM 显示启动信息
echo.
echo %GREEN%===============================================%RESET%
echo %GREEN% 启动情报数据处理系统%RESET%
echo %GREEN%===============================================%RESET%
echo.
echo %CYAN%环境信息:%RESET%
echo Conda环境: %CONDA_ENV_NAME%
echo 项目路径: %PROJECT_PATH%
echo Python版本:
python --version
echo.
echo %YELLOW%按 Ctrl+C 停止系统%RESET%
echo.
REM 启动系统
echo %CYAN%启动情报数据处理系统主程序...%RESET%
python main.py
echo.
echo %CYAN%情报数据处理系统已停止%RESET%
pause
+109
View File
@@ -133927,3 +133927,112 @@
→ module: 'RSSDataAIProcessor'
2025-10-29 10:30:40.130 | DEBUG | ai_processor_rss_data:112 - 处理记录 182 (2/3)
→ module: 'RSSDataAIProcessor'
2025-10-29 17:34:56.620 | INFO | base_reporter:329 - 添加数据源: AI分析结果
→ module: 'BaseReporter'
2025-10-29 17:34:56.621 | INFO | daily:42 - 开始生成日报
→ module: 'DailyReporter'
2025-10-29 17:34:56.621 | INFO | daily:48 - 时间范围: 2025-10-28 17:34:56 至 2025-10-29 17:34:56
→ module: 'DailyReporter'
2025-10-29 17:34:56.621 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: '\n SELECT \n `文章标题` as title,\n `文章链接` as link,\n `文章摘要` as summary,\n `发布时间` as publish_time,\n `来源URL` as source_url,\n `分类` as category,\n `标签` as tags,\n `相关度评分` as relevance_score,\n `分析说明` as analysis_note,\n `处理时间` as process_time\n FROM `ai_processor_rss_analysis`\n ...
2025-10-29 17:34:57.326 | INFO | base_reporter:130 - 时间范围 2025-10-28 17:34:56.621364 到 2025-10-29 17:34:56.621364 内没有相关数据(是否相关=1)
→ module: 'AIAnalysisDataSource'
2025-10-29 17:34:57.329 | ERROR | daily:83 - 生成日报失败: No module named 'markdown'
→ exc_info: True
2025-10-29 17:35:46.546 | INFO | base_reporter:329 - 添加数据源: AI分析结果
→ module: 'BaseReporter'
2025-10-29 17:35:46.547 | INFO | daily:42 - 开始生成日报
→ module: 'DailyReporter'
2025-10-29 17:35:46.547 | INFO | daily:48 - 时间范围: 2025-10-28 17:35:46 至 2025-10-29 17:35:46
→ module: 'DailyReporter'
2025-10-29 17:35:46.547 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: '\n SELECT \n `文章标题` as title,\n `文章链接` as link,\n `文章摘要` as summary,\n `发布时间` as publish_time,\n `来源URL` as source_url,\n `分类` as category,\n `标签` as tags,\n `相关度评分` as relevance_score,\n `分析说明` as analysis_note,\n `处理时间` as process_time\n FROM `ai_processor_rss_analysis`\n ...
2025-10-29 17:35:47.054 | INFO | base_reporter:130 - 时间范围 2025-10-28 17:35:46.547444 到 2025-10-29 17:35:46.547444 内没有相关数据(是否相关=1)
→ module: 'AIAnalysisDataSource'
2025-10-29 17:35:47.358 | INFO | base_reporter:449 - 报告已保存到: output/reports/daily\daily_report_20251029_173546.html
→ module: 'DailyReporter'
2025-10-29 17:35:47.360 | INFO | daily:72 - 日报生成完成: output/reports/daily\daily_report_20251029_173546.html
→ module: 'DailyReporter'
2025-10-29 17:36:37.665 | INFO | base_reporter:329 - 添加数据源: AI分析结果
→ module: 'BaseReporter'
2025-10-29 17:36:37.666 | INFO | weekly:42 - 开始生成周报
→ module: 'WeeklyReporter'
2025-10-29 17:36:37.666 | INFO | weekly:48 - 时间范围: 2025-10-22 17:36:37 至 2025-10-29 17:36:37
→ module: 'WeeklyReporter'
2025-10-29 17:36:37.667 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: '\n SELECT \n `文章标题` as title,\n `文章链接` as link,\n `文章摘要` as summary,\n `发布时间` as publish_time,\n `来源URL` as source_url,\n `分类` as category,\n `标签` as tags,\n `相关度评分` as relevance_score,\n `分析说明` as analysis_note,\n `处理时间` as process_time\n FROM `ai_processor_rss_analysis`\n ...
2025-10-29 17:36:38.112 | INFO | base_reporter:135 - 获取到 1 条相关数据(是否相关=1)
→ module: 'AIAnalysisDataSource'
2025-10-29 17:36:38.234 | INFO | base_reporter:449 - 报告已保存到: output/reports/weekly\weekly_report_20251029_173637.html
→ module: 'WeeklyReporter'
2025-10-29 17:36:38.235 | INFO | weekly:72 - 周报生成完成: output/reports/weekly\weekly_report_20251029_173637.html
→ module: 'WeeklyReporter'
2025-10-30 13:47:56.057 | INFO | task_scheduler:27 - 任务调度器已初始化,最大工作线程数: 5
→ module: 'TaskScheduler'
2025-10-30 13:48:20.079 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
2025-10-30 13:48:20.346 | INFO | mysql_agent:134 - 查询执行成功
→ module: 'MySQLAgent(Windows)'
→ 行数: 1
2025-10-30 13:48:20.349 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
2025-10-30 13:56:55.700 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
2025-10-30 13:56:55.939 | INFO | mysql_agent:134 - 查询执行成功
→ module: 'MySQLAgent(Windows)'
→ 行数: 1
2025-10-30 13:56:55.941 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
2025-10-30 13:57:07.496 | INFO | task_scheduler:27 - 任务调度器已初始化,最大工作线程数: 5
→ module: 'TaskScheduler'
2025-10-30 13:57:10.827 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
2025-10-30 13:57:11.121 | INFO | mysql_agent:134 - 查询执行成功
→ module: 'MySQLAgent(Windows)'
→ 行数: 1
2025-10-30 13:57:11.125 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
2025-10-30 13:57:49.005 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
2025-10-30 13:57:49.286 | INFO | mysql_agent:134 - 查询执行成功
→ module: 'MySQLAgent(Windows)'
→ 行数: 1
2025-10-30 13:57:49.737 | INFO | processor_rss_data:65 - RSS数据处理器初始化完成
→ module: 'RSSDataProcessor'
2025-10-30 13:57:49.738 | INFO | processor_rss_data:335 - 开始处理RSS数据...
→ module: 'RSSDataProcessor'
2025-10-30 13:57:49.740 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: '\n SELECT id, 文章标题, 文章摘要, 发布时间, 来源URL, 文章链接\n FROM collector_rss_subscriptions\n WHERE 是否已处理 = 0\n ORDER BY 发布时间 DESC\n LIMIT %s\n '
2025-10-30 13:57:50.013 | INFO | processor_rss_data:107 - 成功加载 6 条未处理的RSS数据
→ module: 'RSSDataProcessor'
2025-10-30 13:57:50.014 | INFO | processor_rss_data:146 - 成功加载停用词表,共 98 个词
→ module: 'RSSDataProcessor'
2025-10-30 13:57:50.015 | INFO | processor_rss_data:82 - 成功加载汽车后市场关键词,共 37 个
→ module: 'RSSDataProcessor'
2025-10-30 13:57:50.899 | INFO | processor_rss_data:235 - 数据处理完成,共处理 6 条记录
→ module: 'RSSDataProcessor'
2025-10-30 13:57:50.905 | INFO | processor_rss_data:246 - 过滤出 0 条汽车后市场相关新闻
→ module: 'RSSDataProcessor'
2025-10-30 13:57:51.015 | DEBUG | mysql_agent:614 - 更新执行完成
→ module: 'MySQLAgent(Windows)'
→ 受影响行数: 6
2025-10-30 13:57:51.015 | INFO | processor_rss_data:129 - 成功标记 6 条数据为已处理
→ module: 'RSSDataProcessor'
2025-10-30 13:57:51.016 | INFO | processor_rss_data:372 - RSS数据处理完成
→ module: 'RSSDataProcessor'
→ total_articles: 6
→ filtered_articles: 0
→ filter_rate: 0.0
→ processing_time: '2025-10-30 13:57:50'
→ mark_success: True
2025-10-30 13:57:53.702 | DEBUG | mysql_agent:614 - 更新执行完成
→ module: 'MySQLAgent(Windows)'
→ 受影响行数: 1
+744
View File
@@ -71071,3 +71071,747 @@ Traceback (most recent call last):
└ 'RSSDataProcessor'
AttributeError: 类 RSSDataProcessor 中未找到方法 main
2025-10-29 17:34:57.329 | ERROR | daily:83 - 生成日报失败: No module named 'markdown'
→ exc_info: True
2025-10-30 13:48:20.349 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x00000241EF7C0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x00000241EF7C1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x00000241EDB7B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x00000241EDB7D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x00000241ED6E9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x00000241EF781D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'be29372e-391'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'ddbb9c4d44b6'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x00000241F0E71E40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 48, 20, 70000, tzinfo=tzutc()), 'msg_id': '7653bb92-3d92-4584-b5fd-f30...
│ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x00000241EF7AB140>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x00000241EF6F5B40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x00000241EF7ADDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x00000241EEDFA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x00000241F0E1B5E0>
└ <function _pseudo_sync_runner at 0x00000241EEDD5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x00000241F0E1B5E0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x00000241F101DA90>, <ast.FunctionDef object at 0x00000241F101D890>, <ast.Expr object at 0x000002...
│ │ └ <ast.Module object at 0x00000241F101DA50>
│ └ <function InteractiveShell.run_ast_nodes at 0x00000241EEDFA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 24191c224e0, execution_count=2 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x00000241EEDFA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x00000241EEDF4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
│ │ └ <property object at 0x00000241EEDF4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
└ <code object <module> at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x0000024191C38C20>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x0000024191C38EA0>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x0000024191C3F920>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x00000241FF4E9700>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x00000241EF7C0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x00000241EF7C1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x00000241EDB7B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x00000241EDB7D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x00000241ED6E9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x00000241EF781D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'be29372e-391'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'ddbb9c4d44b6'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x00000241F0E71E40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 48, 20, 70000, tzinfo=tzutc()), 'msg_id': '7653bb92-3d92-4584-b5fd-f30...
│ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x00000241EF7AB140>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x00000241EF6F5B40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x00000241EF7ADDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x00000241EEDFA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x00000241F0E1B5E0>
└ <function _pseudo_sync_runner at 0x00000241EEDD5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x00000241F0E1B5E0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x00000241F101DA90>, <ast.FunctionDef object at 0x00000241F101D890>, <ast.Expr object at 0x000002...
│ │ └ <ast.Module object at 0x00000241F101DA50>
│ └ <function InteractiveShell.run_ast_nodes at 0x00000241EEDFA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 24191c224e0, execution_count=2 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x00000241EEDFA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x00000241EEDF4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
│ │ └ <property object at 0x00000241EEDF4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
└ <code object <module> at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x0000024191C38C20>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x0000024191C38EA0>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x0000024191C3F920>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x00000241FF4E9700>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
2025-10-30 13:56:55.941 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x00000241EF7C0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x00000241EF7C1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x00000241EDB7B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x00000241EDB7D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x00000241ED6E9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x00000241EF781D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'be29372e-391'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'36f03038e4ef'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x0000024191C6BC40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 56, 55, 692000, tzinfo=tzutc()), 'msg_id': '788ee0fd-a13f-4e53-98b0-e9...
│ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x00000241EF7AB140>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x00000241F0DB22C0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x00000241EF7ADDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x00000241EEDFA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x0000024191A23AC0>
└ <function _pseudo_sync_runner at 0x00000241EEDD5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x0000024191A23AC0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x0000024191E00090>, <ast.FunctionDef object at 0x00000241F1027390>, <ast.Expr object at 0x000002...
│ │ └ <ast.Module object at 0x0000024191E00050>
│ └ <function InteractiveShell.run_ast_nodes at 0x00000241EEDFA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 24191c22900, execution_count=3 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x00000241EEDFA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x00000241EEDF4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
│ │ └ <property object at 0x00000241EEDF4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
└ <code object <module> at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x0000024191C38CC0>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x0000024191C38EA0>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x0000024191C3F920>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x00000241FF4E9700>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x00000241EF7C0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x00000241EF7C1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x00000241EDB7B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x00000241EDB7D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x00000241ED6E9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x00000241EF781D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'be29372e-391'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'36f03038e4ef'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x0000024191C6BC40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 56, 55, 692000, tzinfo=tzutc()), 'msg_id': '788ee0fd-a13f-4e53-98b0-e9...
│ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x00000241EF7AB140>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x00000241F0DB22C0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x00000241EF7ADDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x00000241EEDFA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x0000024191A23AC0>
└ <function _pseudo_sync_runner at 0x00000241EEDD5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x0000024191A23AC0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x0000024191E00090>, <ast.FunctionDef object at 0x00000241F1027390>, <ast.Expr object at 0x000002...
│ │ └ <ast.Module object at 0x0000024191E00050>
│ └ <function InteractiveShell.run_ast_nodes at 0x00000241EEDFA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 24191c22900, execution_count=3 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x00000241EEDFA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x00000241EEDF4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
│ │ └ <property object at 0x00000241EEDF4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
└ <code object <module> at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x0000024191C38CC0>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x0000024191C38EA0>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x0000024191C3F920>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x00000241FF4E9700>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
2025-10-30 13:57:11.125 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x000001E4F28A0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x000001E4EE687D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x000001E4F28A1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x000001E4F288ADE0>
└ <ipykernel.kernelapp.IPKernelApp object at 0x000001E4EE687D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x000001E4F0D2B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x000001E4F288ADE0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x000001E4F0D2D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x000001E4F08B9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x000001E4F2861D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x000001E4F288A6C0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'4c713768-9d1'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'1b55c49a4a65'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x000001E4F288A6C0>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x000001E4F3F51E40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 57, 10, 815000, tzinfo=tzutc()), 'msg_id': '132b59a7-4a02-4a8d-a25c-e2...
│ └ [b'4c713768-9d1a-49ca-83ed-6814787009a5']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x000001E4F288B200>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x000001E4F27D5B40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x000001E4F288DDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x000001E4F1EEA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x000001E4F3EFADC0>
└ <function _pseudo_sync_runner at 0x000001E4F1ED5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x000001E4F3EFADC0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_19964\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x000001E4F412F450>, <ast.FunctionDef object at 0x000001E4F410FED0>, <ast.Expr object at 0x000001...
│ │ └ <ast.Module object at 0x000001E4F412C190>
│ └ <function InteractiveShell.run_ast_nodes at 0x000001E4F1EEA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 1e4ffcf6240, execution_count=2 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x000001E4F1EEA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x000001E4F1EE4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
│ │ └ <property object at 0x000001E4F1EE4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
└ <code object <module> at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x000001E4FFD10CC0>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x000001E4FFD10EA0>
└ <__main__.TaskManager object at 0x000001E4FEBA31D0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x000001E4FFD0F9C0>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x000001E4FE552A20>
└ <__main__.TaskManager object at 0x000001E4FEBA31D0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x000001E4F28A0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x000001E4EE687D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x000001E4F28A1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x000001E4F288ADE0>
└ <ipykernel.kernelapp.IPKernelApp object at 0x000001E4EE687D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x000001E4F0D2B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x000001E4F288ADE0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x000001E4F0D2D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x000001E4F08B9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x000001E4F2861D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x000001E4F288A6C0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'4c713768-9d1'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'1b55c49a4a65'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x000001E4F288A6C0>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x000001E4F3F51E40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 57, 10, 815000, tzinfo=tzutc()), 'msg_id': '132b59a7-4a02-4a8d-a25c-e2...
│ └ [b'4c713768-9d1a-49ca-83ed-6814787009a5']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x000001E4F288B200>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x000001E4F27D5B40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x000001E4F288DDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x000001E4F1EEA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x000001E4F3EFADC0>
└ <function _pseudo_sync_runner at 0x000001E4F1ED5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x000001E4F3EFADC0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_19964\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x000001E4F412F450>, <ast.FunctionDef object at 0x000001E4F410FED0>, <ast.Expr object at 0x000001...
│ │ └ <ast.Module object at 0x000001E4F412C190>
│ └ <function InteractiveShell.run_ast_nodes at 0x000001E4F1EEA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 1e4ffcf6240, execution_count=2 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x000001E4F1EEA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x000001E4F1EE4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
│ │ └ <property object at 0x000001E4F1EE4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
└ <code object <module> at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x000001E4FFD10CC0>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x000001E4FFD10EA0>
└ <__main__.TaskManager object at 0x000001E4FEBA31D0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x000001E4FFD0F9C0>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x000001E4FE552A20>
└ <__main__.TaskManager object at 0x000001E4FEBA31D0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
@@ -0,0 +1,301 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>汽车后市场情报报告</title>
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@300;400;500;700&display=swap" rel="stylesheet">
<style>
:root {
--primary: #3498db;
--secondary: #2ecc71;
--accent: #e74c3c;
--dark: #2c3e50;
--light: #f8f9fa;
--border: #e0e0e0;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Noto Sans SC', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.8;
color: #333;
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
padding: 20px;
}
.report-container {
max-width: 1200px;
margin: 0 auto;
padding: 40px;
background: white;
box-shadow: 0 10px 40px rgba(0,0,0,0.1);
border-radius: 12px;
}
.report-header {
text-align: center;
padding-bottom: 30px;
border-bottom: 3px solid var(--primary);
margin-bottom: 40px;
}
.report-header h1 {
color: var(--dark);
font-size: 2.5em;
margin-bottom: 10px;
}
.report-header .report-date {
color: #666;
font-size: 1.1em;
}
h1 {
color: var(--dark);
font-size: 2em;
margin: 30px 0 20px 0;
padding-bottom: 10px;
border-bottom: 2px solid var(--primary);
}
h2 {
color: var(--dark);
font-size: 1.6em;
margin: 25px 0 15px 0;
padding-left: 10px;
border-left: 4px solid var(--primary);
}
h3 {
color: var(--dark);
font-size: 1.3em;
margin: 20px 0 10px 0;
}
h4 {
color: #555;
font-size: 1.1em;
margin: 15px 0 8px 0;
}
p {
margin: 12px 0;
text-align: justify;
}
ul, ol {
margin: 15px 0;
padding-left: 30px;
}
li {
margin: 8px 0;
}
/* 表格样式 */
table {
width: 100%;
border-collapse: collapse;
margin: 25px 0;
box-shadow: 0 2px 15px rgba(0,0,0,0.1);
border-radius: 8px;
overflow: hidden;
}
table thead {
background: linear-gradient(135deg, var(--primary) 0%, #2980b9 100%);
color: white;
}
table th {
padding: 15px;
text-align: left;
font-weight: 600;
}
table td {
padding: 12px 15px;
border-bottom: 1px solid var(--border);
}
table tbody tr:hover {
background-color: #f5f5f5;
}
table tbody tr:last-child td {
border-bottom: none;
}
/* 代码块样式 */
pre {
background: #f4f4f4;
border: 1px solid var(--border);
border-radius: 6px;
padding: 15px;
overflow-x: auto;
margin: 20px 0;
}
code {
background: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}
pre code {
background: none;
padding: 0;
}
/* 链接样式 */
a {
color: var(--primary);
text-decoration: none;
border-bottom: 1px dotted var(--primary);
transition: all 0.3s;
}
a:hover {
color: var(--accent);
border-bottom-color: var(--accent);
}
/* 新闻列表样式 */
.news-item {
background: #f9f9f9;
border-left: 4px solid var(--secondary);
padding: 15px 20px;
margin: 15px 0;
border-radius: 6px;
transition: all 0.3s;
}
.news-item:hover {
background: #f0f0f0;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
.news-item h3 {
margin-top: 0;
color: var(--dark);
}
.news-item .news-meta {
color: #666;
font-size: 0.9em;
margin-top: 10px;
}
.news-item .news-category {
display: inline-block;
background: var(--secondary);
color: white;
padding: 3px 10px;
border-radius: 12px;
font-size: 0.85em;
margin-right: 10px;
}
/* 统计信息样式 */
.stats-box {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 25px;
border-radius: 10px;
margin: 25px 0;
}
.stats-box h2 {
color: white;
border: none;
padding: 0;
margin: 0 0 15px 0;
}
.stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin-top: 20px;
}
.stat-item {
text-align: center;
}
.stat-number {
font-size: 2.5em;
font-weight: bold;
margin-bottom: 5px;
}
.stat-label {
font-size: 0.9em;
opacity: 0.9;
}
/* 响应式设计 */
@media (max-width: 768px) {
.report-container {
padding: 20px;
}
.report-header h1 {
font-size: 1.8em;
}
h1 {
font-size: 1.6em;
}
h2 {
font-size: 1.3em;
}
table {
font-size: 0.9em;
}
table th,
table td {
padding: 8px;
}
}
/* 打印样式 */
@media print {
body {
background: white;
padding: 0;
}
.report-container {
box-shadow: none;
padding: 0;
}
}
</style>
</head>
<body>
<div class="report-container">
<h1>汽车后市场情报日报</h1>
<h2>报告时间</h2>
<p><strong>生成时间</strong>: 2025-10-29 17:35:46
<strong>时间范围</strong>: 2025-10-28 17:35:46 至 2025-10-29 17:35:46</p>
<h2>数据统计</h2>
<ul>
<li><strong>相关文章数</strong>: 0</li>
</ul>
<h2>相关新闻</h2>
<p>昨日无汽车后市场相关的新闻</p>
</div>
</body>
</html>
@@ -0,0 +1,311 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>汽车后市场情报报告</title>
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@300;400;500;700&display=swap" rel="stylesheet">
<style>
:root {
--primary: #3498db;
--secondary: #2ecc71;
--accent: #e74c3c;
--dark: #2c3e50;
--light: #f8f9fa;
--border: #e0e0e0;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Noto Sans SC', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.8;
color: #333;
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
padding: 20px;
}
.report-container {
max-width: 1200px;
margin: 0 auto;
padding: 40px;
background: white;
box-shadow: 0 10px 40px rgba(0,0,0,0.1);
border-radius: 12px;
}
.report-header {
text-align: center;
padding-bottom: 30px;
border-bottom: 3px solid var(--primary);
margin-bottom: 40px;
}
.report-header h1 {
color: var(--dark);
font-size: 2.5em;
margin-bottom: 10px;
}
.report-header .report-date {
color: #666;
font-size: 1.1em;
}
h1 {
color: var(--dark);
font-size: 2em;
margin: 30px 0 20px 0;
padding-bottom: 10px;
border-bottom: 2px solid var(--primary);
}
h2 {
color: var(--dark);
font-size: 1.6em;
margin: 25px 0 15px 0;
padding-left: 10px;
border-left: 4px solid var(--primary);
}
h3 {
color: var(--dark);
font-size: 1.3em;
margin: 20px 0 10px 0;
}
h4 {
color: #555;
font-size: 1.1em;
margin: 15px 0 8px 0;
}
p {
margin: 12px 0;
text-align: justify;
}
ul, ol {
margin: 15px 0;
padding-left: 30px;
}
li {
margin: 8px 0;
}
/* 表格样式 */
table {
width: 100%;
border-collapse: collapse;
margin: 25px 0;
box-shadow: 0 2px 15px rgba(0,0,0,0.1);
border-radius: 8px;
overflow: hidden;
}
table thead {
background: linear-gradient(135deg, var(--primary) 0%, #2980b9 100%);
color: white;
}
table th {
padding: 15px;
text-align: left;
font-weight: 600;
}
table td {
padding: 12px 15px;
border-bottom: 1px solid var(--border);
}
table tbody tr:hover {
background-color: #f5f5f5;
}
table tbody tr:last-child td {
border-bottom: none;
}
/* 代码块样式 */
pre {
background: #f4f4f4;
border: 1px solid var(--border);
border-radius: 6px;
padding: 15px;
overflow-x: auto;
margin: 20px 0;
}
code {
background: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}
pre code {
background: none;
padding: 0;
}
/* 链接样式 */
a {
color: var(--primary);
text-decoration: none;
border-bottom: 1px dotted var(--primary);
transition: all 0.3s;
}
a:hover {
color: var(--accent);
border-bottom-color: var(--accent);
}
/* 新闻列表样式 */
.news-item {
background: #f9f9f9;
border-left: 4px solid var(--secondary);
padding: 15px 20px;
margin: 15px 0;
border-radius: 6px;
transition: all 0.3s;
}
.news-item:hover {
background: #f0f0f0;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
.news-item h3 {
margin-top: 0;
color: var(--dark);
}
.news-item .news-meta {
color: #666;
font-size: 0.9em;
margin-top: 10px;
}
.news-item .news-category {
display: inline-block;
background: var(--secondary);
color: white;
padding: 3px 10px;
border-radius: 12px;
font-size: 0.85em;
margin-right: 10px;
}
/* 统计信息样式 */
.stats-box {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 25px;
border-radius: 10px;
margin: 25px 0;
}
.stats-box h2 {
color: white;
border: none;
padding: 0;
margin: 0 0 15px 0;
}
.stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin-top: 20px;
}
.stat-item {
text-align: center;
}
.stat-number {
font-size: 2.5em;
font-weight: bold;
margin-bottom: 5px;
}
.stat-label {
font-size: 0.9em;
opacity: 0.9;
}
/* 响应式设计 */
@media (max-width: 768px) {
.report-container {
padding: 20px;
}
.report-header h1 {
font-size: 1.8em;
}
h1 {
font-size: 1.6em;
}
h2 {
font-size: 1.3em;
}
table {
font-size: 0.9em;
}
table th,
table td {
padding: 8px;
}
}
/* 打印样式 */
@media print {
body {
background: white;
padding: 0;
}
.report-container {
box-shadow: none;
padding: 0;
}
}
</style>
</head>
<body>
<div class="report-container">
<h1>汽车后市场情报周报</h1>
<h2>报告时间</h2>
<p><strong>生成时间</strong>: 2025-10-29 17:36:37
<strong>时间范围</strong>: 2025-10-22 17:36:37 至 2025-10-29 17:36:37</p>
<h2>数据统计</h2>
<ul>
<li><strong>相关文章数</strong>: 1</li>
</ul>
<h2>汽车后市场相关新闻</h2>
<p>共找到 1 篇相关新闻:</p>
<h3>1. 2025年全国汽车以旧换新补贴申请量突破1000万份</h3>
<ul class="news-list">
<li><strong>分类</strong>: 二手车</li>
<li><strong>标签</strong>: ["二手车", "政策补贴"]</li>
<li><strong>摘要</strong>: 记者从商务部了解到,截至10月22日,2025年汽车以旧换新补贴申请量突破1000万份,其中汽车报废更新超340万份,置换更新超660万份。</li>
<li><strong>链接</strong>: <a href="http://www.chinanews.com/cj/2025/10-23/10503300.shtml" rel="noopener noreferrer" target="_blank">http://www.chinanews.com/cj/2025/10-23/10503300.shtml</a></li>
<li><strong>发布时间</strong>: 2025-10-23 08:35:31</li>
<li><strong>相关度评分</strong>: 70</li>
<li><strong>分析说明</strong>: 新闻涉及汽车以旧换新补贴申请量,其中包含置换更新超660万份,直接关联二手车流通环节,属于汽车后市场中二手车领域的政策动态。</li>
</ul>
</div>
</body>
</html>
@@ -195,7 +195,7 @@ class RSSDataAIProcessor:
raise
def create_ai_result_table(self):
"""创建AI处理结果表"""
"""创建AI处理结果表(使用安全方法,确保不会删除现有数据)"""
create_sql = f"""
CREATE TABLE IF NOT EXISTS {self.ai_table} (
id INT AUTO_INCREMENT PRIMARY KEY COMMENT '主键ID',
@@ -221,10 +221,13 @@ class RSSDataAIProcessor:
"""
try:
self.db_agent.execute_sql(create_sql)
self.log.info(f"成功创建AI结果表: {self.ai_table}")
# 使用安全方法创建表(如果不存在),确保不会删除现有数据
self.db_agent.create_table_if_not_exists(
table_name=self.ai_table,
create_sql=create_sql
)
except Exception as e:
self.log.error(f"创建AI结果表失败: {str(e)}", exc_info=True)
self.log.error(f"创建AI结果表失败(可能是数据库连接问题): {str(e)}", exc_info=True)
raise
def load_unprocessed_data(self, limit: int = 100) -> pd.DataFrame:
+52 -30
View File
@@ -270,14 +270,34 @@ class RSSDataProcessor:
save_df = save_df.drop('segmented_words', axis=1)
# 检查目标表是否存在,不存在则创建
if not self.db_agent.table_exists(self.processed_table_name):
self.create_processed_table()
# 注意:如果连接失败,table_exists可能返回False,需要捕获异常
try:
table_exists = self.db_agent.table_exists(self.processed_table_name)
if not table_exists:
self.log.warning(f"{self.processed_table_name} 不存在,正在创建...")
self.create_processed_table()
else:
# 表存在时,也确保有唯一索引(安全操作,不会删除数据)
self.create_processed_table() # 这个方法会检查并添加索引,不会删除数据
except Exception as table_check_error:
# 如果检查表存在性时连接失败,记录错误但不中断
# 因为后续的插入操作会再次尝试连接
self.log.warning(f"检查表存在性时出错(可能是连接问题): {str(table_check_error)}")
# 尝试创建表(如果表已存在,CREATE TABLE IF NOT EXISTS不会报错)
try:
self.create_processed_table()
except Exception as create_error:
# 如果创建表也失败(可能是连接问题),记录错误
self.log.error(f"创建表时出错(可能是连接问题): {str(create_error)}")
# 继续尝试插入,如果表存在,插入会成功;如果表不存在,插入会失败并抛出异常
# 插入数据
# 插入数据ignore_duplicates=True 会跳过重复的文章链接)
# 注意:INSERT INTO + ignore_duplicates 只会跳过重复记录,不会覆盖或删除现有数据
# 如果数据库连接失败,此操作会抛出异常,不会部分成功
inserted_rows = self.db_agent.insert_from_df(
table_name=self.processed_table_name,
df=save_df,
ignore_duplicates=True
ignore_duplicates=True # 跳过重复的文章链接,不会删除或覆盖现有数据
)
self.log.info(f"成功保存 {inserted_rows} 条处理结果到数据库")
@@ -288,7 +308,10 @@ class RSSDataProcessor:
return False
def create_processed_table(self):
"""创建处理结果表"""
"""
创建处理结果表(带唯一索引保护,防止重复插入)
使用 MySQLAgent 的安全方法,确保不会删除现有数据
"""
create_sql = f"""
CREATE TABLE IF NOT EXISTS {self.processed_table_name} (
id INT AUTO_INCREMENT PRIMARY KEY,
@@ -306,10 +329,27 @@ class RSSDataProcessor:
"""
try:
self.db_agent.execute_sql(create_sql)
self.log.info(f"成功创建处理结果表: {self.processed_table_name}")
# 使用安全方法创建表(如果不存在)
self.db_agent.create_table_if_not_exists(
table_name=self.processed_table_name,
create_sql=create_sql
)
# 使用安全方法添加唯一索引(如果不存在)
# 注意:唯一索引在创建表时不能直接包含,因为如果表已存在会报错
# 所以先创建表,再单独添加索引
self.db_agent.add_unique_index_if_not_exists(
table_name=self.processed_table_name,
index_name='uk_article_link',
column_name='文章链接',
column_length=500,
check_duplicates=True
)
except Exception as e:
self.log.error(f"创建表失败: {str(e)}", exc_info=True)
# 如果创建表或添加索引失败(可能是连接问题),抛出异常
# 这样上层调用可以知道操作失败,不会误以为成功
self.log.error(f"创建/检查表失败(可能是数据库连接问题): {str(e)}", exc_info=True)
raise
def get_processing_statistics(self, df: pd.DataFrame) -> Dict[str, Any]:
@@ -383,27 +423,9 @@ class RSSDataProcessor:
return {'success': False, 'message': f'处理失败: {str(e)}'}
def main():
"""主函数入口"""
try:
# 创建处理器实例
processor = RSSDataProcessor()
# 处理RSS数据
result = processor.process_rss_data(
limit=5000, # 处理最近5000条数据
save_to_db=True # 保存到数据库
)
if result['success']:
print("RSS数据处理完成!")
print(f"处理统计: {result['statistics']}")
else:
print(f"处理失败: {result['message']}")
except Exception as e:
print(f"程序运行出错: {str(e)}")
def main(self, limit: int = 1000, save_to_db: bool = True) -> Dict[str, Any]:
"""主函数入口(实例方法),对外统一调用"""
return self.process_rss_data(limit=limit, save_to_db=save_to_db)
if __name__ == "__main__":
main()
RSSDataProcessor().main(limit=5000, save_to_db=True)
@@ -1,6 +1,5 @@
import importlib
import threading
import time
from datetime import datetime
from typing import Dict, List, Optional, Any
import croniter
+67
View File
@@ -0,0 +1,67 @@
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "## 获取钉钉token",
"id": "4a7d18176711daad"
},
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-10-30T02:59:09.458462Z",
"start_time": "2025-10-30T02:59:09.015765Z"
}
},
"source": [
"from utils.Ding_api import DingAPI\n",
"\n",
"api_instance = DingAPI()\n",
"token = api_instance.get_token()\n",
"print(token)"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\requests\\__init__.py:86: RequestsDependencyWarning: Unable to find acceptable character detection dependency (chardet or charset_normalizer).\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2b166a1c8e683ee38f8d2112a7de5e05\n"
]
}
],
"execution_count": 1
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+271 -155
View File
@@ -10,14 +10,25 @@
},
{
"cell_type": "code",
"execution_count": 1,
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-10-29T02:25:08.582541Z",
"start_time": "2025-10-29T02:25:08.473381Z"
}
},
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PROJECT_ROOT = d:\\Idea Project\\intelligence_system\n",
"\u001b[32m2025-10-30 13:57:07\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m任务调度器已初始化,最大工作线程数: 5\u001b[0m\n"
]
}
],
"source": [
"# 使 Notebook 可从项目根导入\n",
"import sys\n",
@@ -206,18 +217,7 @@
" except Exception:\n",
" pass\n",
" return str(dt)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PROJECT_ROOT = D:\\Idea Project\\intelligence_system\n",
"\u001B[32m2025-10-29 10:25:08\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mtask_scheduler\u001B[0m - \u001B[1m任务调度器已初始化,最大工作线程数: 5\u001B[0m\n"
]
}
],
"execution_count": 8
]
},
{
"cell_type": "markdown",
@@ -242,7 +242,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001B[32m2025-10-29 09:54:09\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n"
"\u001b[32m2025-10-29 09:54:09\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n"
]
},
{
@@ -469,6 +469,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"id": "eab90de72c35429e",
"metadata": {
"ExecuteTime": {
@@ -476,6 +477,62 @@
"start_time": "2025-10-29T02:26:12.648420Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2025-10-29 10:26:12\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n"
]
},
{
"data": {
"text/markdown": [
"### 任务详情\n",
"**任务ID**: 1\n",
"**任务名称**: RSS新闻订阅\n",
"**任务类型**: collector\n",
"**模块路径**: processors.processor_rss_data.RSSDataProcessor\n",
"**Cron表达式**: */5 * * * *\n",
"**时区**: Asia/Shanghai\n",
"**最后运行时间**: 2025-10-28 13:35:09\n",
"**下次运行时间**: 2025-10-29 10:25:00\n",
"**运行状态**: success\n",
"**是否活跃**: 是\n",
"**运行次数**: 496\n",
"**创建时间**: 2025-10-16 15:47:34"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'task_id': 1,\n",
" 'task_name': 'RSS新闻订阅',\n",
" 'task_type': 'collector',\n",
" 'module_path': 'processors.processor_rss_data.RSSDataProcessor',\n",
" 'cron_expression': '*/5 * * * *',\n",
" 'time_zone': 'Asia/Shanghai',\n",
" 'next_run_time': Timestamp('2025-10-29 10:25:00'),\n",
" 'last_run_time': Timestamp('2025-10-28 13:35:09'),\n",
" 'last_run_status': 'success',\n",
" 'run_count': 496,\n",
" 'is_active': 1,\n",
" 'is_running': 0,\n",
" 'created_at': Timestamp('2025-10-16 15:47:34'),\n",
" 'updated_at': Timestamp('2025-10-29 10:24:49')}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 查看指定任务的详情\n",
"def show_task_details(task_id):\n",
@@ -503,53 +560,7 @@
"\n",
"# 执行:查看任务ID为1的详情(替换为实际ID)\n",
"show_task_details(1)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001B[32m2025-10-29 10:26:12\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n"
]
},
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "### 任务详情\n**任务ID**: 1\n**任务名称**: RSS新闻订阅\n**任务类型**: collector\n**模块路径**: processors.processor_rss_data.RSSDataProcessor\n**Cron表达式**: */5 * * * *\n**时区**: Asia/Shanghai\n**最后运行时间**: 2025-10-28 13:35:09\n**下次运行时间**: 2025-10-29 10:25:00\n**运行状态**: success\n**是否活跃**: 是\n**运行次数**: 496\n**创建时间**: 2025-10-16 15:47:34"
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"data": {
"text/plain": [
"{'task_id': 1,\n",
" 'task_name': 'RSS新闻订阅',\n",
" 'task_type': 'collector',\n",
" 'module_path': 'processors.processor_rss_data.RSSDataProcessor',\n",
" 'cron_expression': '*/5 * * * *',\n",
" 'time_zone': 'Asia/Shanghai',\n",
" 'next_run_time': Timestamp('2025-10-29 10:25:00'),\n",
" 'last_run_time': Timestamp('2025-10-28 13:35:09'),\n",
" 'last_run_status': 'success',\n",
" 'run_count': 496,\n",
" 'is_active': 1,\n",
" 'is_running': 0,\n",
" 'created_at': Timestamp('2025-10-16 15:47:34'),\n",
" 'updated_at': Timestamp('2025-10-29 10:24:49')}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 10
]
},
{
"cell_type": "markdown",
@@ -569,8 +580,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001B[32m2025-10-29 09:56:52\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n",
"\u001B[32m2025-10-29 09:56:52\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mtask_scheduler\u001B[0m - \u001B[1m新任务添加成功\u001B[0m\n"
"\u001b[32m2025-10-29 09:56:52\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n",
"\u001b[32m2025-10-29 09:56:52\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m新任务添加成功\u001b[0m\n"
]
},
{
@@ -646,6 +657,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"id": "c892fd8ad2f0dd9d",
"metadata": {
"ExecuteTime": {
@@ -653,6 +665,61 @@
"start_time": "2025-10-29T02:29:55.754298Z"
}
},
"outputs": [
{
"data": {
"text/markdown": [
"### 任务ID 2 更新成功"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2025-10-29 10:29:56\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n"
]
},
{
"data": {
"text/markdown": [
"### 任务详情\n",
"**任务ID**: 2\n",
"**任务名称**: RSS基于规则数据处理\n",
"**任务类型**: processor\n",
"**模块路径**: processors.processor_rss_data\n",
"**Cron表达式**: 0 8,20 * * *\n",
"**时区**: Asia/Shanghai\n",
"**最后运行时间**: 2025-10-28 13:34:49\n",
"**下次运行时间**: 2025-10-28 20:00:00\n",
"**运行状态**: success\n",
"**是否活跃**: 是\n",
"**运行次数**: 10\n",
"**创建时间**: 2025-10-22 16:06:42"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 更新任务属性\n",
"def update_task(task_id, **kwargs):\n",
@@ -685,53 +752,7 @@
"\n",
"# 执行:同时更新多个属性(名称和Cron表达式)\n",
"# update_task(1, name=\"每日早间新闻采集\", cron=\"0 8 * * *\")"
],
"outputs": [
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "### 任务ID 2 更新成功"
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001B[32m2025-10-29 10:29:56\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n"
]
},
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "### 任务详情\n**任务ID**: 2\n**任务名称**: RSS基于规则数据处理\n**任务类型**: processor\n**模块路径**: processors.processor_rss_data\n**Cron表达式**: 0 8,20 * * *\n**时区**: Asia/Shanghai\n**最后运行时间**: 2025-10-28 13:34:49\n**下次运行时间**: 2025-10-28 20:00:00\n**运行状态**: success\n**是否活跃**: 是\n**运行次数**: 10\n**创建时间**: 2025-10-22 16:06:42"
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 21
]
},
{
"cell_type": "markdown",
@@ -800,15 +821,149 @@
},
{
"cell_type": "code",
"execution_count": 3,
"id": "94892f4134316f8e",
"metadata": {
"jupyter": {
"is_executing": true
},
"ExecuteTime": {
"start_time": "2025-10-29T02:30:10.298891Z"
},
"jupyter": {
"is_executing": true
}
},
"outputs": [
{
"data": {
"text/markdown": [
"### 开始执行任务ID 2"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"---"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2025-10-30 13:57:49\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:49\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1mRSS数据处理器初始化完成\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:49\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m开始处理RSS数据...\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载 6 条未处理的RSS数据\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载停用词表,共 98 个词\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载汽车后市场关键词,共 37 个\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Building prefix dict from the default dictionary ...\n",
"Loading model from cache C:\\Users\\zy187\\AppData\\Local\\Temp\\jieba.cache\n",
"Loading model cost 0.839 seconds.\n",
"Prefix dict has been built successfully.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m数据处理完成,共处理 6 条记录\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m过滤出 0 条汽车后市场相关新闻\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:51\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功标记 6 条数据为已处理\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:51\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1mRSS数据处理完成\u001b[0m\n"
]
},
{
"data": {
"text/markdown": [
"**任务名称**: RSS基于规则数据处理"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"**任务ID**: 2"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"**执行时长**: 4.41 秒"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"---"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"### ✅ 任务执行成功"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'success': True,\n",
" 'task_name': 'RSS基于规则数据处理',\n",
" 'task_id': 2,\n",
" 'execution_time': 4.414557695388794,\n",
" 'output': '',\n",
" 'error': None}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 手动执行任务(异步方式,快速返回)\n",
"def run_task_manually(task_id):\n",
@@ -854,47 +1009,8 @@
" return result\n",
"\n",
"# 执行:手动运行任务ID为2的任务(显示详细执行过程)\n",
"run_task_with_details(3)"
],
"outputs": [
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "### 开始执行任务ID 3"
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "---"
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001B[32m2025-10-29 10:30:10\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n",
"\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1mRSS数据AI处理器初始化完成\u001B[0m\n",
"\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1m开始批量处理数据,批次大小: 200, 延迟: 1.5秒\u001B[0m\n",
"\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1m成功加载 3 条未处理的数据\u001B[0m\n"
]
}
],
"execution_count": null
"run_task_with_details(2)"
]
},
{
"cell_type": "markdown",
+52
View File
@@ -0,0 +1,52 @@
import requests
from typing import Optional
class DingAPI():
def __init__(self):
self.token = None
self.url = ''
def get_token(self) -> Optional:
"""
获取Access Token
return: token(str)
"""
url = 'https://api.dingtalk.com/v1.0/oauth2/dinga88e3d35525b86ca/token'
payload = {
"client_id": "dingn3de1pyuwkymohhe",
"client_secret": "qv__egWJnLVXh14_R1rfD_vBi7M8Gzhnk94EJN6puMzsqqpBCP8U7Ow-zA7SV8Rx",
"grant_type": "client_credentials"
}
response = requests.post(url, json=payload)
token = response.json().get('access_token')
return token
def card_create(self, data):
"""
创建并投放卡片
return: response(dict)
"""
url = 'https://api.dingtalk.com/v1.0/card/instances/createAndDeliver'
headers = {
'x-acs-dingtalk-access-token': data["token"],
'Content-Type': 'application/json'
}
data = {
"cardTemplateId": "cee2715f-001d-41cb-8fcd-3be18be9fbf5.schema",
"outTrackId": "",
"cardData":"",
"openSpaceId":"dtv1.card//IM_GROUP.4210192048793363",# 场域id
}
response = requests.post(url, json=data, headers=headers)
return response.json()
def get_
+114
View File
@@ -153,6 +153,12 @@ class MySQLAgent:
"""
兼容旧接口的通用插入方法:保留replace参数,同时支持新的ignore_duplicates
自动处理重复数据,对所有数据源通用,插入失败的数据会通过日志记录
安全性说明:
- 使用 INSERT INTO(不是 REPLACE INTO 或 INSERT ... ON DUPLICATE KEY UPDATE
- 当 ignore_duplicates=True 时,重复记录会被跳过,不会覆盖或删除现有数据
- 如果数据库连接失败,操作会抛出异常,不会部分成功
- 所有操作都是安全的,不会导致数据丢失或覆盖
"""
# 【兼容性处理】如果未指定ignore_duplicates,用replace参数推导
if ignore_duplicates is None:
@@ -592,6 +598,114 @@ class MySQLAgent:
exc_info=True)
return False
def create_table_if_not_exists(self, table_name: str, create_sql: str) -> bool:
"""
创建表(如果不存在)
使用 CREATE TABLE IF NOT EXISTS,不会删除已存在的表和数据
参数:
table_name: 表名
create_sql: 完整的 CREATE TABLE SQL 语句(必须包含 IF NOT EXISTS
返回:
bool: 是否成功(表已存在也会返回True)
注意:
- 此方法使用 CREATE TABLE IF NOT EXISTS,是安全的,不会删除现有数据
- 如果连接失败,会抛出异常
"""
if "IF NOT EXISTS" not in create_sql.upper():
self.log.warning(f"CREATE TABLE 语句建议使用 IF NOT EXISTS 以保证安全性")
try:
self.execute_sql(create_sql)
self.log.info(f"成功创建/检查表(表已存在时不会删除数据): {table_name}")
return True
except Exception as e:
self.log.error(f"创建/检查表失败(可能是数据库连接问题): {str(e)}",
table=table_name, exc_info=True)
raise
def add_unique_index_if_not_exists(self, table_name: str, index_name: str,
column_name: str, column_length: int = 500,
check_duplicates: bool = True) -> bool:
"""
添加唯一索引(如果不存在)
不会删除数据,只添加索引
参数:
table_name: 表名
index_name: 索引名称
column_name: 要添加索引的列名
column_length: 索引长度(对于VARCHAR/TEXT类型)
check_duplicates: 是否在添加索引前检查重复数据
返回:
bool: 是否成功添加索引(索引已存在也会返回True)
注意:
- 此方法是安全的,不会删除数据
- 如果表中存在重复数据,会跳过添加索引(不会删除数据)
- 如果连接失败,会抛出异常
"""
try:
# 1. 检查索引是否已存在
check_index_sql = f"""
SELECT COUNT(*) as cnt
FROM INFORMATION_SCHEMA.STATISTICS
WHERE TABLE_SCHEMA = %s
AND TABLE_NAME = %s
AND INDEX_NAME = %s
"""
result = self.query_to_df(
check_index_sql,
params=(self.config['database'], table_name, index_name),
is_print=False
)
if not result.empty and result['cnt'].iloc[0] > 0:
self.log.debug(f"唯一索引 {index_name} 已存在,跳过添加")
return True
# 2. 如果启用重复检查,先检查是否有重复数据
if check_duplicates:
check_duplicates_sql = f"""
SELECT {column_name}, COUNT(*) as cnt
FROM `{table_name}`
WHERE {column_name} IS NOT NULL AND {column_name} != ''
GROUP BY {column_name}
HAVING cnt > 1
LIMIT 1
"""
duplicates = self.query_to_df(check_duplicates_sql, is_print=False)
if not duplicates.empty:
self.log.warning(
f"{table_name} 中存在重复的 {column_name} 数据,无法添加唯一索引。"
"现有数据不会被删除。",
duplicate_count=len(duplicates)
)
return False
# 3. 添加唯一索引
add_index_sql = f"""
ALTER TABLE `{table_name}`
ADD UNIQUE KEY `{index_name}` ({column_name}({column_length}))
"""
self.execute_sql(add_index_sql)
self.log.info(f"成功添加唯一索引 {index_name}(现有数据不受影响)")
return True
except Exception as e:
error_msg = str(e)
# 如果索引已存在,不报错
if "Duplicate key name" in error_msg or "already exists" in error_msg.lower():
self.log.debug(f"唯一索引 {index_name} 已存在,跳过添加")
return True
else:
self.log.warning(f"添加唯一索引时出现问题(不影响现有数据): {error_msg}")
raise
def execute_sql(self, sql: str, params: Union[tuple, dict, None] = None,
fetch: bool = False) -> Union[int, List[Dict[str, Any]]]:
"""执行SQL语句(原有逻辑完全保留)"""