﻿import os
import re
import json
from datetime import datetime,timezone,timedelta
from dateutil import tz
import feedparser
import html
import boto3

# boto3の初期設定
REGION = 'ap-northeast-1'
translate = boto3.client('translate', region_name=REGION)
bedrock = boto3.client('bedrock-runtime', region_name=REGION)
ssm = boto3.client('ssm', region_name=REGION)
ses = boto3.client('ses', region_name=REGION)

# 取得元RSS設定
THRESHOLD = 4    #取得間隔 : この時間内に発信された情報を通知

# SESの通知設定
MAIL_FROM = os.environ['MAIL_FROM']
MAIL_DEST_JA = os.environ['MAIL_DEST_JA']
MAIL_DEST_EN = os.environ['MAIL_DEST_EN']

def lambda_handler(event, context):
    articles = 0

    # AWS Systems Managerパラメータストアの読み込み
    pstore =  os.environ['PARAMETER_STORE']
    rsslist = json.loads(ssm.get_parameters(Names=[pstore])['Parameters'][0]['Value'])
    
    # 登録されているRSSだけ繰り返す
    for rss in rsslist:
        articles = articles + fetchrss(rss)

    # 終了処理
    print(articles,"Articles Posted")
    return {
        'statusCode': 200,
        'body': json.dumps({'Articles': articles})
    }

def fetchrss(rss):
    # RSSフィードを解析し通知する処理
    count = 0
    p = re.compile(r"<[^>]*?>")
    print("処理開始:",rss['TITLE'])

    try:
        # 対象のRSSフィードを取得
        feed = feedparser.parse(rss['URL'])

        for entry in feed.entries:
            # 現在時刻と記事の投稿時刻を取得
            now = datetime.now(timezone.utc)
            threshold = now - timedelta(hours=THRESHOLD)
            published_date = datetime(*entry.published_parsed[:6],tzinfo=tz.gettz("UTC"))  # time型をdatetime型に変換

            # 投稿時刻が前回チェック時より後の記事をピックアップ
            if threshold < published_date:

                # 日本語記事以外の場合は翻訳する
                if rss['LANG'] == "ja":
                    subject = entry.title 
                    #summary = html.unescape(p.sub("",entry.summary))
                    summary = get_summary(entry.link)

                    sesmessage = "\n【" + rss['TITLE'] + "】\n" \
                        + summary + "\n\n" \
                        + "記事リンク\n" + entry.link

                    seshtml = """<html>
                        <head></head>
                        <body><br>
                        <p>【""" + rss['TITLE'] + """】</p>
                        <h2><a href=\"""" + entry.link + """"> """ + subject + """</a></h2>
                        """ + entry.summary + """
                        </body></html>
                    """

                else:
                    orgsubject = entry.title 
                    orgsummary = html.unescape(p.sub("",entry.summary))
                    subject = get_translate_text(orgsubject,rss['LANG'])
                    #summary = get_translate_text(orgsummary,rss['LANG'])
                    summary = get_summary(entry.link)

                    sesmessage = "\n【" + rss['TITLE'] + "】\n" \
                        + summary + "\n\n" \
                        + "記事リンク\n" + entry.link + "\n\n" \
                        + "(Original Article)------------------" + "\n" \
                        + orgsubject + "\n\n" \
                        + orgsummary
                        
                    seshtml = """<html>
                        <head></head>
                        <body><br>
                        <p>【""" + rss['TITLE'] + """】</p>
                        <h2><a href=\"""" + entry.link + """"> """ + subject + """</a></h2>
                        <p>""" + summary + """</p>
                        <p>(Original Article)------------------</p>
                        <h3>""" + orgsubject + """</h3>
                        """ + orgsummary + """
                        </body></html>
                    """

                print("記事:", subject)
                print("--post SES")
                response = post_email(subject,sesmessage,seshtml,rss['LANG'])
                print(response)
                count = count + 1

    except Exception as e:
        print(e)
        raise e

    return count

def get_summary(link):
    # Amazon Bedrockで要約する
    modelId = 'amazon.titan-text-express-v1'
    accept = 'application/json'
    contentType = 'application/json'
    prompt = "あなたはAWSのアーキテクトです。このURL(" + link + ")にアクセスして内容を要約してください"

    body = json.dumps({
        "inputText": prompt
        "textGenerationConfig":{
            "maxTokenCount": 4000
        }
    })

    response = bedrock.invoke_model(
        body=body, 
        modelId=modelId, 
        accept=accept, 
        contentType=contentType
        )
    response_body = json.loads(response.get('body').read())

    # text
    summary = response_body['results'][0]['outputText']
    print(summary)

    return summary

def get_translate_text(text,lang):
    # Amazon Translateで翻訳する
    response = translate.translate_text(
        Text = text,
        SourceLanguageCode = lang,
        TargetLanguageCode = 'ja'
    )
    return response['TranslatedText']
    
def post_email(subject,body,html,lang):
    # SESでメール送信する
    destination = MAIL_DEST_JA if lang=="ja" else MAIL_DEST_EN

    response = ses.send_email(
        Source = MAIL_FROM,
        Destination = {'ToAddresses' : [destination]},
        Message={
            'Subject':{'Data': subject },
            'Body':{
                'Text':{'Data' : body},
                'Html':{'Data' : html}
            }
        }
    )
    return response['ResponseMetadata']['HTTPStatusCode']
