From a24e0162e5af937b3e0430c1175aa338f8ab1bf2 Mon Sep 17 00:00:00 2001 From: guyskk Date: Wed, 28 Aug 2024 12:10:07 +0800 Subject: [PATCH 1/8] add user profile and refresh_vip_info --- .../migrations/0035_auto_20240828_0341.py | 39 ++++++++ rssant_api/models/__init__.py | 2 + rssant_api/models/user_profile.py | 91 +++++++++++++++++++ rssant_api/views/ezrevenue.py | 14 +-- rssant_cli/user.py | 43 +++++++++ 5 files changed, 178 insertions(+), 11 deletions(-) create mode 100644 rssant_api/migrations/0035_auto_20240828_0341.py create mode 100644 rssant_api/models/user_profile.py diff --git a/rssant_api/migrations/0035_auto_20240828_0341.py b/rssant_api/migrations/0035_auto_20240828_0341.py new file mode 100644 index 0000000..453a341 --- /dev/null +++ b/rssant_api/migrations/0035_auto_20240828_0341.py @@ -0,0 +1,39 @@ +# Generated by Django 2.2.28 on 2024-08-28 03:41 + +from django.conf import settings +import django.contrib.postgres.fields.jsonb +from django.db import migrations, models +import django.db.models.deletion +import ool + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('rssant_api', '0034_auto_20240821_0736'), + ] + + operations = [ + migrations.CreateModel( + name='UserProfile', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('_version', ool.VersionField(default=0)), + ('_created', models.DateTimeField(auto_now_add=True, help_text='创建时间')), + ('_updated', models.DateTimeField(auto_now=True, help_text='更新时间')), + ('vip_balance', models.BigIntegerField(blank=True, null=True, verbose_name='会员余额')), + ('vip_info', django.contrib.postgres.fields.jsonb.JSONField(blank=True, null=True, verbose_name='会员信息')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + bases=(ool.VersionedMixin, models.Model), + ), + migrations.AddIndex( + model_name='userprofile', + index=models.Index(fields=['user'], name='rssant_api__user_id_d19d1d_idx'), + ), + migrations.AddConstraint( + model_name='userprofile', + constraint=models.UniqueConstraint(fields=('user',), name='userprofile_unique_user'), + ), + ] diff --git a/rssant_api/models/__init__.py b/rssant_api/models/__init__.py index 7e96cc6..1e31c3c 100644 --- a/rssant_api/models/__init__.py +++ b/rssant_api/models/__init__.py @@ -8,6 +8,7 @@ from .story_info import StoryId, StoryInfo from .story_service import STORY_SERVICE, CommonStory from .union_feed import FeedImportItem, FeedUnionId, UnionFeed from .union_story import StoryUnionId, UnionStory +from .user_profile import UserProfile from .user_publish import UserPublish from .worker_task import WorkerTask @@ -23,6 +24,7 @@ __models__ = ( FeedStoryStat, Registery, ImageInfo, + UserProfile, UserPublish, WorkerTask, ) diff --git a/rssant_api/models/user_profile.py b/rssant_api/models/user_profile.py new file mode 100644 index 0000000..0721e7d --- /dev/null +++ b/rssant_api/models/user_profile.py @@ -0,0 +1,91 @@ +import time +from typing import Optional + +from django.contrib.auth.models import AbstractUser + +from rssant_common.ezrevenue import EZREVENUE_CLIENT + +from .helper import JSONField, Model, User, models, optional + + +class UserProfile(Model): + """用户关联信息""" + + class Meta: + indexes = [ + models.Index(fields=['user']), + ] + constraints = [ + models.UniqueConstraint(fields=['user'], name='userprofile_unique_user'), + ] + + class Admin: + display_fields = ['user', 'vip_balance'] + + user = models.ForeignKey(User, on_delete=models.CASCADE) + vip_balance: int = models.BigIntegerField(**optional, verbose_name='会员余额') + vip_info: dict = JSONField(**optional, verbose_name='会员信息') + + def is_vip(self, now=None): + if now is None: + now = int(time.time()) + # 会员余额为None可能是未同步会员信息,当作会员处理 + if self.vip_balance is None: + return True + if self.vip_balance >= now: + return True + return False + + @classmethod + def _get_impl( + cls, + *, + user_id: int = None, + ) -> Optional["UserProfile"]: + q = UserProfile.objects.filter(user_id=user_id) + result = q.seal().first() + return result + + @classmethod + def get(cls, *, user_id: int): + return cls._get_impl(user_id=user_id) + + @classmethod + def is_vip_user(self, user: AbstractUser): + profile = self.get(user_id=user.id) + # 未同步会员信息,当作会员处理 + if profile is None: + return True + return profile.is_vip() + + @classmethod + def refresh_vip_info(cls, user: AbstractUser): + params = dict( + paywall_alias='paywall_vip', + customer=dict( + external_id=user.id, + nickname=user.username, + external_dt_created=user.date_joined.isoformat(), + ), + include_balance=True, + ) + vip_info = EZREVENUE_CLIENT.call('customer.info', params) + vip_balance = cls._get_vip_balance(vip_info) + profile, _ = UserProfile.objects.update_or_create( + dict( + user_id=user.id, + vip_balance=vip_balance, + vip_info=vip_info, + ), + user_id=user.id, + ) + return profile + + @classmethod + def _get_vip_balance(cls, vip_info: dict): + vip_equity_alias = 'equity_vip' + for item in vip_info['balance_s']: + equity_alias = item['equity']['alias'] + if equity_alias == vip_equity_alias: + return item['balance'] + return None diff --git a/rssant_api/views/ezrevenue.py b/rssant_api/views/ezrevenue.py index 81def80..24914ad 100644 --- a/rssant_api/views/ezrevenue.py +++ b/rssant_api/views/ezrevenue.py @@ -2,6 +2,7 @@ from django.contrib.auth.models import AbstractUser from rest_framework.response import Response from django_rest_validr import RestRouter, T +from rssant_api.models.user_profile import UserProfile from rssant_common.ezrevenue import EZREVENUE_CLIENT EzrevenueView = RestRouter() @@ -10,18 +11,9 @@ EzrevenueView = RestRouter() @EzrevenueView.post('ezrevenue/customer.info') def ezrevenue_customer_info( request, - include_balance: T.bool.default(True), ) -> T.dict: if not EZREVENUE_CLIENT: return Response(status=501) user: AbstractUser = request.user - params = dict( - paywall_alias='paywall_vip', - customer=dict( - external_id=user.id, - nickname=user.username, - external_dt_created=user.date_joined.isoformat(), - ), - include_balance=include_balance, - ) - return EZREVENUE_CLIENT.call('customer.info', params) + profile = UserProfile.refresh_vip_info(user=user) + return profile.vip_info diff --git a/rssant_cli/user.py b/rssant_cli/user.py index 460bf7b..ca62f3e 100644 --- a/rssant_cli/user.py +++ b/rssant_cli/user.py @@ -1,8 +1,15 @@ import logging +import typing +from concurrent.futures import Future, ThreadPoolExecutor +from queue import Empty as QueueEmpty +from queue import Queue import click +from django.contrib.auth import get_user_model +from tqdm import tqdm import rssant_common.django_setup # noqa:F401 +from rssant_api.models import UserProfile LOG = logging.getLogger(__name__) @@ -12,5 +19,41 @@ def main(): """User Commands""" +def _run_refresh_vip_info(queue: Queue, progress: tqdm): + while True: + try: + user = queue.get(block=False) + except QueueEmpty: + return + UserProfile.refresh_vip_info(user=user) + progress.update(1) + + +@click.option('--user-id', type=int, required=False, help='User ID') +@main.command() +def refresh_vip_info(user_id: typing.Optional[int] = None): + User = get_user_model() + if user_id is not None: + user_s = User.objects.filter(id=user_id).all() + else: + user_s = User.objects.all() + LOG.info(f'refresh_vip_info user count={len(user_s)}') + queue = Queue() + for user in user_s: + queue.put(user) + progress = tqdm(total=queue.qsize(), ascii=True, ncols=80) + pool = ThreadPoolExecutor(max_workers=20) + try: + fut_s: typing.List[Future] = [] + for _ in range(20): + fut = pool.submit(_run_refresh_vip_info, queue, progress) + fut_s.append(fut) + for fut in fut_s: + fut.result() + finally: + progress.close() + pool.shutdown(wait=True) + + if __name__ == "__main__": main() -- Gitee From d044f1f74093ab671fd4ade9287c7b6212155de8 Mon Sep 17 00:00:00 2001 From: guyskk Date: Wed, 28 Aug 2024 14:35:36 +0800 Subject: [PATCH 2/8] pg count verify ignore rssant_api_workertask --- rssant_harbor/pg_count.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rssant_harbor/pg_count.py b/rssant_harbor/pg_count.py index 6071a92..0ff0b26 100644 --- a/rssant_harbor/pg_count.py +++ b/rssant_harbor/pg_count.py @@ -78,6 +78,11 @@ def pg_count(): return dict(tables=ret_tables) +_PG_VERIFY_IGNORE_TABLE_SET = { + 'rssant_api_workertask', +} + + def pg_verify(result, expect_result, bias): result_map = {} for item in result['tables']: @@ -86,6 +91,8 @@ def pg_verify(result, expect_result, bias): details = [] for expect_item in expect_result['tables']: name = expect_item['name'] + if name in _PG_VERIFY_IGNORE_TABLE_SET: + continue expect_count = expect_item['count'] count = result_map.get(name) if count is None: -- Gitee From cf02587a3b62469b1342aac770828baed59a125b Mon Sep 17 00:00:00 2001 From: guyskk Date: Wed, 28 Aug 2024 14:45:28 +0800 Subject: [PATCH 3/8] add userprofile dt_vip_synced --- ...uto_20240828_0341.py => 0035_auto_20240828_0643.py} | 3 ++- rssant_api/models/user_profile.py | 6 +++++- rssant_api/views/ezrevenue.py | 2 +- rssant_cli/user.py | 10 +++++----- 4 files changed, 13 insertions(+), 8 deletions(-) rename rssant_api/migrations/{0035_auto_20240828_0341.py => 0035_auto_20240828_0643.py} (90%) diff --git a/rssant_api/migrations/0035_auto_20240828_0341.py b/rssant_api/migrations/0035_auto_20240828_0643.py similarity index 90% rename from rssant_api/migrations/0035_auto_20240828_0341.py rename to rssant_api/migrations/0035_auto_20240828_0643.py index 453a341..a5f341d 100644 --- a/rssant_api/migrations/0035_auto_20240828_0341.py +++ b/rssant_api/migrations/0035_auto_20240828_0643.py @@ -1,4 +1,4 @@ -# Generated by Django 2.2.28 on 2024-08-28 03:41 +# Generated by Django 2.2.28 on 2024-08-28 06:43 from django.conf import settings import django.contrib.postgres.fields.jsonb @@ -24,6 +24,7 @@ class Migration(migrations.Migration): ('_updated', models.DateTimeField(auto_now=True, help_text='更新时间')), ('vip_balance', models.BigIntegerField(blank=True, null=True, verbose_name='会员余额')), ('vip_info', django.contrib.postgres.fields.jsonb.JSONField(blank=True, null=True, verbose_name='会员信息')), + ('dt_vip_synced', models.DateTimeField(blank=True, help_text='会员信息同步时间', null=True)), ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), ], bases=(ool.VersionedMixin, models.Model), diff --git a/rssant_api/models/user_profile.py b/rssant_api/models/user_profile.py index 0721e7d..5126026 100644 --- a/rssant_api/models/user_profile.py +++ b/rssant_api/models/user_profile.py @@ -3,6 +3,7 @@ from typing import Optional from django.contrib.auth.models import AbstractUser +from rssant_common import timezone from rssant_common.ezrevenue import EZREVENUE_CLIENT from .helper import JSONField, Model, User, models, optional @@ -25,6 +26,7 @@ class UserProfile(Model): user = models.ForeignKey(User, on_delete=models.CASCADE) vip_balance: int = models.BigIntegerField(**optional, verbose_name='会员余额') vip_info: dict = JSONField(**optional, verbose_name='会员信息') + dt_vip_synced = models.DateTimeField(**optional, help_text="会员信息同步时间") def is_vip(self, now=None): if now is None: @@ -59,7 +61,7 @@ class UserProfile(Model): return profile.is_vip() @classmethod - def refresh_vip_info(cls, user: AbstractUser): + def sync_vip_info(cls, user: AbstractUser): params = dict( paywall_alias='paywall_vip', customer=dict( @@ -71,11 +73,13 @@ class UserProfile(Model): ) vip_info = EZREVENUE_CLIENT.call('customer.info', params) vip_balance = cls._get_vip_balance(vip_info) + dt_vip_synced = timezone.now() profile, _ = UserProfile.objects.update_or_create( dict( user_id=user.id, vip_balance=vip_balance, vip_info=vip_info, + dt_vip_synced=dt_vip_synced, ), user_id=user.id, ) diff --git a/rssant_api/views/ezrevenue.py b/rssant_api/views/ezrevenue.py index 24914ad..3d78685 100644 --- a/rssant_api/views/ezrevenue.py +++ b/rssant_api/views/ezrevenue.py @@ -15,5 +15,5 @@ def ezrevenue_customer_info( if not EZREVENUE_CLIENT: return Response(status=501) user: AbstractUser = request.user - profile = UserProfile.refresh_vip_info(user=user) + profile = UserProfile.sync_vip_info(user=user) return profile.vip_info diff --git a/rssant_cli/user.py b/rssant_cli/user.py index ca62f3e..bbe18b2 100644 --- a/rssant_cli/user.py +++ b/rssant_cli/user.py @@ -19,25 +19,25 @@ def main(): """User Commands""" -def _run_refresh_vip_info(queue: Queue, progress: tqdm): +def _run_sync_vip_info(queue: Queue, progress: tqdm): while True: try: user = queue.get(block=False) except QueueEmpty: return - UserProfile.refresh_vip_info(user=user) + UserProfile.sync_vip_info(user=user) progress.update(1) @click.option('--user-id', type=int, required=False, help='User ID') @main.command() -def refresh_vip_info(user_id: typing.Optional[int] = None): +def sync_vip_info(user_id: typing.Optional[int] = None): User = get_user_model() if user_id is not None: user_s = User.objects.filter(id=user_id).all() else: user_s = User.objects.all() - LOG.info(f'refresh_vip_info user count={len(user_s)}') + LOG.info(f'sync_vip_info user count={len(user_s)}') queue = Queue() for user in user_s: queue.put(user) @@ -46,7 +46,7 @@ def refresh_vip_info(user_id: typing.Optional[int] = None): try: fut_s: typing.List[Future] = [] for _ in range(20): - fut = pool.submit(_run_refresh_vip_info, queue, progress) + fut = pool.submit(_run_sync_vip_info, queue, progress) fut_s.append(fut) for fut in fut_s: fut.result() -- Gitee From 72a8a5b2c1f86c6bbc0d52bb908988cffe704207 Mon Sep 17 00:00:00 2001 From: guyskk Date: Wed, 28 Aug 2024 21:53:37 +0800 Subject: [PATCH 4/8] update feed refresh freeze level logic --- rssant_api/models/feed.py | 169 ++++++++++++++++++++++++++------------ 1 file changed, 116 insertions(+), 53 deletions(-) diff --git a/rssant_api/models/feed.py b/rssant_api/models/feed.py index 9dd4925..2ed082b 100644 --- a/rssant_api/models/feed.py +++ b/rssant_api/models/feed.py @@ -124,12 +124,8 @@ class Feed(Model, ContentHashMixin): **optional, help_text="HTTP response header Last-Modified", ) - content_length = models.IntegerField( - **optional, help_text='length of content' - ) - response_status = models.IntegerField( - **optional, help_text='response status code' - ) + content_length = models.IntegerField(**optional, help_text='length of content') + response_status = models.IntegerField(**optional, help_text='response status code') # 其他 monthly_story_count_data = models.BinaryField( **optional, max_length=514, help_text="monthly story count data" @@ -243,9 +239,7 @@ class Feed(Model, ContentHashMixin): return [x['feed_id'] for x in feeds] @staticmethod - def take_outdated_feeds( - outdate_seconds=300, timeout_seconds=None, limit=300 - ): + def take_outdated_feeds(outdate_seconds=300, timeout_seconds=None, limit=300): """ outdate_seconds: 正常检查时间间隔 timeout_seconds: 异常检查时间间隔 @@ -383,13 +377,39 @@ class Feed(Model, ContentHashMixin): @staticmethod def refresh_freeze_level(): """ - 活跃用户: 90天内有阅读记录 - 冻结策略: - 1. 无人订阅,冻结1个月。有人订阅时解冻。 - 2. 创建时间>=7天,且2年无更新,冻结1个月。有更新时解冻。 - 3. 创建时间>=7天,且没有任何内容,冻结7天。有更新时解冻。 - 4. 无活跃用户订阅,冻结3天。有活跃用户订阅时解冻。 - 5. 其余订阅参照冻结时间表格。 + 冻结级别 = 内容系数 x 用户系数 + 阅读系数 + + 冻结逻辑: + - 周更、月更博客,减少更新 + - 没有会员订阅的,不要更新 + - 有会员但不活跃的,减少更新 + - 内容没人阅读的,不要更新 + + 解冻逻辑: + - 订阅有内容更新时解冻 + - 有用户创建订阅时解冻 + - 定时任务更新冻结级别 + + 内容系数:基础冻结时间,根据更新频率、内容大小确定 + - 创建时间>=7天,且2年无更新,冻结1个月。有更新时解冻。 + - 创建时间>=7天,且没有任何内容,冻结7天。有更新时解冻。 + - 其余订阅参照冻结时间表格。 + + 用户系数:根据用户是否是会员、是否活跃确定(无会员信息的当作会员处理) + - 有会员订阅+有活跃用户订阅 x1 + - 有会员订阅 x2 + - 有7天内活跃用户订阅 x4 + - 有用户订阅 x24 + - 其他 x120 + + 阅读系数:根据订阅阅读记录确定 + - 1天内有阅读 +0 + - 2天内有阅读 +1 + - 7天内有阅读 +3 + - 30天内有阅读 +8 + - 90天内有阅读 +12 + - 其他 +120 + 统计数据: - 90%的订阅小于300KB - 99%的订阅小于1500KB @@ -405,14 +425,60 @@ class Feed(Model, ContentHashMixin): +------------+----------+------------+----------+ """ # https://stackoverflow.com/questions/7869592/how-to-do-an-update-join-in-postgresql - sql = f""" - WITH t AS ( + sql = """ + WITH user_stat AS ( + SELECT + myuser.id AS user_id, + CASE WHEN user_vip.is_vip IS NULL THEN 1 ELSE user_vip.is_vip END AS is_vip, + user_active.is_active + FROM auth_user AS myuser + LEFT OUTER JOIN ( + SELECT user_id, CASE WHEN ( + vip_balance > EXTRACT(epoch FROM NOW()) + ) THEN 1 ELSE 0 END AS is_vip + FROM rssant_api_userprofile + ) AS user_vip ON myuser.id = user_vip.user_id + LEFT OUTER JOIN ( + SELECT user_id, CASE WHEN ( + MAX(dt_updated) >= NOW() - INTERVAL '7 days' + ) THEN 1 ELSE 0 END AS is_active + FROM rssant_api_userfeed GROUP BY user_id + ) AS user_active ON myuser.id = user_active.user_id + ), + feed_user_stat AS ( + SELECT + feed_id, + COUNT(1) AS user_count, + SUM(user_stat.is_vip) AS vip_user_count, + SUM(user_stat.is_active) AS active_user_count + FROM rssant_api_userfeed JOIN user_stat + ON rssant_api_userfeed.user_id = user_stat.user_id + GROUP BY feed_id + ), + feed_read_stat AS ( + SELECT + feed_id, + CASE WHEN ( + MAX(dt_updated) >= NOW() - INTERVAL '1 days' + ) THEN 1 ELSE 0 END AS is_read_1d, + CASE WHEN ( + MAX(dt_updated) >= NOW() - INTERVAL '2 days' + ) THEN 1 ELSE 0 END AS is_read_2d, + CASE WHEN ( + MAX(dt_updated) >= NOW() - INTERVAL '7 days' + ) THEN 1 ELSE 0 END AS is_read_7d, + CASE WHEN ( + MAX(dt_updated) >= NOW() - INTERVAL '30 days' + ) THEN 1 ELSE 0 END AS is_read_30d, + CASE WHEN ( + MAX(dt_updated) >= NOW() - INTERVAL '90 days' + ) THEN 1 ELSE 0 END AS is_read_90d + FROM rssant_api_userfeed GROUP BY feed_id + ), + feed_stat AS ( SELECT feed.id AS id, CASE - WHEN ( - feed_stat.feed_id is NULL OR feed_stat.user_count <= 0 - ) THEN 31 * 24 WHEN ( (feed.dt_created <= NOW() - INTERVAL '7 days') AND (feed.dt_latest_story_published <= NOW() - INTERVAL '2 years') @@ -421,9 +487,6 @@ class Feed(Model, ContentHashMixin): (feed.dt_created <= NOW() - INTERVAL '7 days') AND (feed.dt_latest_story_published is NULL and total_storys <= 0) ) THEN 7 * 24 - WHEN ( - feed_stat.active_user_count <= 0 - ) THEN 3 * 24 WHEN ( feed.content_length >= 1500 * 1024 AND feed.dryness >= 500 ) THEN 9 @@ -440,24 +503,32 @@ class Feed(Model, ContentHashMixin): feed.dryness >= 500 AND feed.content_length >= 300 * 1024 ) THEN 2 ELSE 1 - END AS freeze_level + END AS base_freeze_level, + CASE WHEN ( + feed_user_stat.vip_user_count > 0 + AND feed_user_stat.active_user_count > 0 + ) THEN 1 + WHEN feed_user_stat.vip_user_count > 0 THEN 2 + WHEN feed_user_stat.active_user_count > 0 THEN 4 + WHEN feed_user_stat.user_count > 0 THEN 24 + ELSE 120 + END AS user_freeze_level, + CASE + WHEN feed_read_stat.is_read_1d > 0 THEN 0 + WHEN feed_read_stat.is_read_2d > 0 THEN 1 + WHEN feed_read_stat.is_read_7d > 0 THEN 3 + WHEN feed_read_stat.is_read_30d > 0 THEN 8 + WHEN feed_read_stat.is_read_90d > 0 THEN 12 + ELSE 120 + END AS read_freeze_level FROM rssant_api_feed AS feed - LEFT OUTER JOIN ( - SELECT - feed_id, - COUNT(1) AS user_count, - SUM(user_stat.is_active) AS active_user_count - FROM rssant_api_userfeed JOIN ( - SELECT user_id, CASE WHEN ( - MAX(dt_updated) >= NOW() - INTERVAL '90 days' - ) THEN 1 ELSE 0 END AS is_active - FROM rssant_api_userfeed GROUP BY user_id - ) user_stat - ON rssant_api_userfeed.user_id = user_stat.user_id - GROUP BY feed_id - ) AS feed_stat - ON feed.id = feed_stat.feed_id - WHERE feed.status != '{FeedStatus.DISCARD}' + LEFT OUTER JOIN feed_user_stat ON feed.id = feed_user_stat.feed_id + LEFT OUTER JOIN feed_read_stat ON feed.id = feed_read_stat.feed_id + WHERE feed.status != 'discard' + ), + t AS ( + SELECT id, base_freeze_level * user_freeze_level + read_freeze_level AS freeze_level + FROM feed_stat ) UPDATE rssant_api_feed AS feed SET freeze_level = t.freeze_level @@ -493,16 +564,12 @@ class RawFeed(Model, ContentHashMixin): **optional, help_text="HTTP response header Last-Modified", ) - headers = JSONField( - **optional, help_text='HTTP response headers, JSON object' - ) + headers = JSONField(**optional, help_text='HTTP response headers, JSON object') is_gzipped = models.BooleanField( **optional, default=False, help_text="is content gzip compressed" ) content = models.BinaryField(**optional) - content_length = models.IntegerField( - **optional, help_text='length of content' - ) + content_length = models.IntegerField(**optional, help_text='length of content') dt_created = models.DateTimeField(auto_now_add=True, help_text="创建时间") def set_content(self, content): @@ -538,15 +605,11 @@ class UserFeed(Model): feed = models.ForeignKey(Feed, on_delete=models.CASCADE, **optional) title = models.CharField(max_length=200, **optional, help_text="用户设置的标题") group = models.CharField(max_length=200, **optional, help_text="用户设置的分组") - story_offset = models.IntegerField( - **optional, default=0, help_text="story offset" - ) + story_offset = models.IntegerField(**optional, default=0, help_text="story offset") is_from_bookmark = models.BooleanField( **optional, default=False, help_text='是否从书签导入' ) - is_publish = models.BooleanField( - **optional, default=False, help_text='是否发布' - ) + is_publish = models.BooleanField(**optional, default=False, help_text='是否发布') dt_created = models.DateTimeField(auto_now_add=True, help_text="创建时间") dt_updated = models.DateTimeField(**optional, help_text="更新时间") -- Gitee From 88cb8631d040f48eb0bc19bcc13d4f067e99f1c0 Mon Sep 17 00:00:00 2001 From: guyskk Date: Thu, 29 Aug 2024 16:39:12 +0800 Subject: [PATCH 5/8] fix pg_count db connection broken --- rssant_harbor/pg_count.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rssant_harbor/pg_count.py b/rssant_harbor/pg_count.py index 0ff0b26..5205c1e 100644 --- a/rssant_harbor/pg_count.py +++ b/rssant_harbor/pg_count.py @@ -1,5 +1,5 @@ import django.apps -from django.db import connection +from django.db import close_old_connections, connection sql_count_limit = ''' SELECT count(*) as row_count @@ -66,6 +66,7 @@ def pg_count(): https://stackoverflow.com/questions/7943233/fast-way-to-discover-the-row-count-of-a-table-in-postgresql https://wiki.postgresql.org/wiki/Count_estimate """ + close_old_connections() models = django.apps.apps.get_models() tables = [m._meta.db_table for m in models] story_volume_tables = get_story_volume_tables() -- Gitee From b957a4fa25c9f31e745360aee69caf17757348ff Mon Sep 17 00:00:00 2001 From: guyskk Date: Wed, 18 Sep 2024 16:18:19 +0800 Subject: [PATCH 6/8] update database pool max_overflow --- rssant/settings/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rssant/settings/settings.py b/rssant/settings/settings.py index 7980887..ccfd2f9 100644 --- a/rssant/settings/settings.py +++ b/rssant/settings/settings.py @@ -129,7 +129,7 @@ else: } # https://github.com/lcd1232/django-postgrespool2 -DATABASE_POOL_ARGS = {'max_overflow': 20, 'pool_size': 15, 'recycle': 300} +DATABASE_POOL_ARGS = {'max_overflow': 40, 'pool_size': 15, 'recycle': 300} # Password validation # https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators -- Gitee From 942a11f20358de3d32d388376de25cc13cb8d6a0 Mon Sep 17 00:00:00 2001 From: guyskk Date: Wed, 18 Sep 2024 17:12:50 +0800 Subject: [PATCH 7/8] update story retention limit --- rssant_api/models/story_service.py | 6 ++++-- rssant_api/models/union_story.py | 2 +- rssant_harbor/harbor_service.py | 1 + rssant_worker/worker_service.py | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/rssant_api/models/story_service.py b/rssant_api/models/story_service.py index 61a86b6..95f021e 100644 --- a/rssant_api/models/story_service.py +++ b/rssant_api/models/story_service.py @@ -171,7 +171,8 @@ class StoryService: def _get_unique_ids(self, feed_id, feed_total_story): unique_ids_map = self._get_unique_ids_by_stat(feed_id) if unique_ids_map is None: - begin_offset = max(0, feed_total_story - 300) + limit = min(CONFIG.feed_story_retention, 300) + begin_offset = max(0, feed_total_story - limit) unique_ids_map = self._get_unique_ids_by_story( feed_id, begin_offset, feed_total_story) return unique_ids_map @@ -291,7 +292,8 @@ class StoryService: tmp_unique_ids[story.unique_id] = story.offset tmp_unique_ids = {y: x for x, y in tmp_unique_ids.items()} new_unique_ids = [] - size = min(len(tmp_unique_ids), 300) + limit = min(CONFIG.feed_story_retention, 300) + size = min(len(tmp_unique_ids), limit) begin_offset = max(0, new_total_storys - size) new_begin_offset = new_total_storys for offset in reversed(range(begin_offset, new_total_storys)): diff --git a/rssant_api/models/union_story.py b/rssant_api/models/union_story.py index e5f0f72..3143b8d 100644 --- a/rssant_api/models/union_story.py +++ b/rssant_api/models/union_story.py @@ -316,7 +316,7 @@ class UnionStory: @classmethod def query_recent_by_user( - cls, user_id, feed_unionids=None, days=14, limit=300, detail=False + cls, user_id, feed_unionids=None, days=14, limit=100, detail=False ): """ Deprecated since 1.4.2, use batch_get_by_feed_offset instead diff --git a/rssant_harbor/harbor_service.py b/rssant_harbor/harbor_service.py index 38ee05e..206bde3 100644 --- a/rssant_harbor/harbor_service.py +++ b/rssant_harbor/harbor_service.py @@ -181,6 +181,7 @@ class HarborService: ): now_sub_30d = now - timezone.timedelta(days=30) # save storys, bulk_save_by_feed has standalone transaction + storys = storys[: CONFIG.feed_story_retention] for s in storys: if not s['dt_updated']: s['dt_updated'] = now diff --git a/rssant_worker/worker_service.py b/rssant_worker/worker_service.py index da1a7d5..a9ce97c 100644 --- a/rssant_worker/worker_service.py +++ b/rssant_worker/worker_service.py @@ -417,7 +417,8 @@ def _parse_found(found, checksum_data_base64=None, is_refresh=False): if checksum_data and (not is_refresh): checksum = FeedChecksum.load(checksum_data) result = FeedParser(checksum=checksum).parse(raw_result) - checksum_data = result.checksum.dump(limit=300) + limit = min(CONFIG.feed_story_retention, 300) + checksum_data = result.checksum.dump(limit=limit) checksum_data_base64 = UrlsafeBase64.encode(checksum_data) num_raw_storys = len(raw_result.storys) warnings = None -- Gitee From 0b0c671ccec7360cc69928851a9977b4d88c6b25 Mon Sep 17 00:00:00 2001 From: guyskk Date: Tue, 24 Dec 2024 18:07:42 +0800 Subject: [PATCH 8/8] handle exproxy pick proxy 503 Service Unavailable --- rssant_common/ezproxy.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/rssant_common/ezproxy.py b/rssant_common/ezproxy.py index 8560ac1..9d3929d 100644 --- a/rssant_common/ezproxy.py +++ b/rssant_common/ezproxy.py @@ -1,3 +1,4 @@ +import logging from threading import Lock from typing import List from urllib.parse import urlparse @@ -8,6 +9,8 @@ from cachetools import TTLCache, cached from rssant_common.chnlist import CHINA_WEBSITE_LIST from rssant_config import CONFIG +LOG = logging.getLogger(__name__) + class EzproxyClient: def __init__(self, base_url: str, apikey: str) -> None: @@ -45,13 +48,20 @@ class EzproxyClient: chain: str = None, count: int = 1, ) -> List[dict]: - result = self._call( - 'proxy.pick', - chain=chain, - seed=seed, - region_s=region_s, - count=count, - ) + try: + result = self._call( + 'proxy.pick', + chain=chain, + seed=seed, + region_s=region_s, + count=count, + ) + except httpx.HTTPStatusError as ex: + # Server error '503 Service Unavailable' + if ex.response.status_code == 503: + LOG.warning(str(ex)) + return [] + raise return result['item_s'] def pick_proxy_url( -- Gitee