From 0f58c596c74f933cb826bd1338c2363db7ff9488 Mon Sep 17 00:00:00 2001 From: chunquansang <916920620@qq.com> Date: Mon, 20 May 2024 09:51:35 +0800 Subject: [PATCH] first commit --- Dockerfile | 28 +++++++ main.py | 185 +++++++++++++++++++++++++++++++++++++++++++ pip.conf | 6 ++ requirements.txt | 5 ++ restart_service.sh | 19 +++++ wait-for.sh | 192 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 435 insertions(+) create mode 100644 Dockerfile create mode 100644 main.py create mode 100644 pip.conf create mode 100644 requirements.txt create mode 100755 restart_service.sh create mode 100755 wait-for.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a2b0ab9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM python:3.7 + +RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime +# 安装netcat +RUN apt-get update + +# 可选:设置镜像源为国内 +COPY pip.conf /root/.pip/pip.conf + +# 容器内创建 myproject 文件夹 +ENV APP_HOME=/home/myproject +RUN mkdir -p $APP_HOME + +WORKDIR $APP_HOME + +# 将当前目录加入到工作目录中(. 表示当前目录) +ADD . $APP_HOME + +# 更新pip版本 +RUN /usr/local/bin/python -m pip install --upgrade pip + +# 安装vim +RUN apt-get install -y vim + +# 安装项目依赖 +RUN pip install -r requirements.txt + +CMD ["sh", "-c", "$APP_HOME/restart_service.sh & python main.py"] diff --git a/main.py b/main.py new file mode 100644 index 0000000..33c60c2 --- /dev/null +++ b/main.py @@ -0,0 +1,185 @@ +# main.py + +import pymysql +import time +from dotenv import load_dotenv +import os +import requests +import logging +import hashlib + +# Load environment variables from .env file +load_dotenv() + +logging.basicConfig( + filename='download.log', + level=logging.INFO, + format='%(asctime)s [%(levelname)s] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +logger = logging.getLogger(__name__) + + +def get_file_hash(filename): + if os.path.isfile(filename) is False: + return None + + # make a hash object + h_sha256 = hashlib.sha256() + + # open file for reading in binary mode + with open(filename, "rb") as file: + # read file in chunks and update hash + chunk = 0 + while chunk != b"": + chunk = file.read(1024) + h_sha256.update(chunk) + + # return the hex digest + return h_sha256.hexdigest() + + +def connect_to_mysql(): + try: + connection = pymysql.connect( + host=os.getenv('MYSQL_HOST'), + port=int(os.getenv('MYSQL_PORT')), + user=os.getenv('MYSQL_USER'), + password=os.getenv('MYSQL_PASSWORD'), + database=os.getenv('MYSQL_DATABASE'), + cursorclass=pymysql.cursors.DictCursor + ) + if connection.open: + return connection + except pymysql.Error as e: + logger.error(f"Error: {e}") + return None + +def fetch_oldest_video(connection): + try: + with connection.cursor() as cursor: + query = "SELECT * FROM tp_videos WHERE download_status = 0 ORDER BY create_time LIMIT 1;" + cursor.execute(query) + result = cursor.fetchone() + return result + except pymysql.Error as e: + logger.error(f"Error fetching data: {e}") + return None + +def fetch_same_video(connection, video_hash): + try: + with connection.cursor() as cursor: + sql = "SELECT * FROM tp_videos WHERE video_hash = %s ORDER BY id DESC LIMIT 1" + + # 执行 SQL 查询 + cursor.execute(sql, (video_hash,)) + result = cursor.fetchone() + return result + except pymysql.Error as e: + logger.error(f"Error fetching data: {e}") + return None + +def save_download_success(connection, id_, video_hash, is_same=False): + try: + with connection.cursor() as cursor: + query = "UPDATE tp_videos SET download_status=2, video_hash=%s, video_status=%s WHERE id=%s;" + if is_same: + cursor.execute(query, (video_hash, 4, id_)) + else: + cursor.execute(query, (video_hash, 1, id_)) + connection.commit() + except pymysql.Error as e: + with connection.cursor() as cursor: + query = "UPDATE tp_videos SET download_status=3 WHERE id=%s;" + cursor.execute(query, (id_)) + connection.commit() + logger.error(f"更新失败: {e}") + except Exception as e1: + logger.error(f"更新失败: {e1}") + +def save_download_error(connection, id_): + try: + with connection.cursor() as cursor: + query = "UPDATE tp_videos SET download_status=3 WHERE id=%s;" + cursor.execute(query, (id_, )) + connection.commit() + except pymysql.Error as e: + logger.error(f"更新失败: {e}") + + +def download_video(video_data): + download_path = video_data.get('video_path', '') + file_name = video_data.get('video_name', '') + ftp_path = video_data.get('ftp_path', '') + + if download_path and file_name and ftp_path: + + try: + # 发送带有流式传输的 GET 请求 + response = requests.get(ftp_path, stream=True) + # 检查响应状态码是否为 200 + if response.status_code == 200: + with open(download_path, 'wb') as file: + # 逐块写入文件 + for chunk in response.iter_content(chunk_size=8192): + if chunk: + file.write(chunk) + + logger.info(f"下载完成: {download_path}") + + else: + logger.error(f"下载失败:状态码:{response.status_code}") + except requests.RequestException as e: + logger.error(f"下载失败: {e}") + raise requests.RequestException(f"下载失败: {e}") + except IOError as e: + logger.error(f"文件操作失败: {e}") + raise IOError(f"文件操作失败: {e}") + +def main(): + count = 0 + try: + while True: + connection = connect_to_mysql() + if not connection: + time.sleep(10) + continue + video_data = fetch_oldest_video(connection) + if video_data and video_data.get('id', None): + logger.info(str(video_data)) + + # video_url = video_data['url'] # Assuming URL is in the 'url' column + try: + download_video(video_data) + except Exception as e: + logger.error(f"保存异常{e}") + save_download_error(connection, video_data['id']) + else: + download_path = video_data.get('video_path', '') + video_hash = get_file_hash(download_path) + same_rsult = fetch_same_video(connection, video_hash) + if same_rsult: + try: + os.remove(download_path) + logger.info(f"删除重复视频:{download_path}") + except Exception as e: + logger.error(f"删除重复视频失败,原因为:{e}") + + save_download_success(connection, video_data['id'], video_hash, is_same=True) + else: + save_download_success(connection, video_data['id'], video_hash, is_same=False) + time.sleep(2) + else: + print("No videos to download. Waiting...") + time.sleep(5) + count += 1 + # if count == 500: + # logger.info(f"重启") + # break + except KeyboardInterrupt: + logger.info("Stopping the downloader.") + finally: + connection.close() + +if __name__ == "__main__": + main() diff --git a/pip.conf b/pip.conf new file mode 100644 index 0000000..c969f0d --- /dev/null +++ b/pip.conf @@ -0,0 +1,6 @@ +[global] +index-url = https://pypi.tuna.tsinghua.edu.cn/simple +[install] +trusted-host = https://pypi.tuna.tsinghua.edu.cn +[list] +format=columns \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0787f93 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +pymysql +requests +python-dotenv +cryptography +tqdm \ No newline at end of file diff --git a/restart_service.sh b/restart_service.sh new file mode 100755 index 0000000..73cee94 --- /dev/null +++ b/restart_service.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +while true; do + # 寻找 Python 进程并杀死 + sleep 1800 + pids=$(pgrep -f "python main.py") + if [ -n "$pids" ]; then + echo "Killing Python processes with PIDs: $pids" + # 获取当前时间 + current_time=$(date "+%Y-%m-%d %H:%M:%S") + # 使用循环逐个杀死进程 + for pid in $pids; do + kill "$pid" + echo "Restart at $current_time" >> log.txt # 将信息追加到log.txt文件中 + done + else + echo "Python process not found." + fi +done diff --git a/wait-for.sh b/wait-for.sh new file mode 100755 index 0000000..d5ab9f5 --- /dev/null +++ b/wait-for.sh @@ -0,0 +1,192 @@ +#!/bin/sh + +# The MIT License (MIT) +# +# Copyright (c) 2017 Eficode Oy +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +VERSION="2.2.4" + +set -- "$@" -- "$TIMEOUT" "$QUIET" "$PROTOCOL" "$HOST" "$PORT" "$result" +TIMEOUT=15 +QUIET=0 +# The protocol to make the request with, either "tcp" or "http" +PROTOCOL="tcp" + +echoerr() { + if [ "$QUIET" -ne 1 ]; then printf "%s\n" "$*" 1>&2; fi +} + +usage() { + exitcode="$1" + cat << USAGE >&2 +Usage: + $0 host:port|url [-t timeout] [-- command args] + -q | --quiet Do not output any status messages + -t TIMEOUT | --timeout=timeout Timeout in seconds, zero for no timeout + Defaults to 15 seconds + -v | --version Show the version of this tool + -- COMMAND ARGS Execute command with args after the test finishes +USAGE + exit "$exitcode" +} + +wait_for() { + case "$PROTOCOL" in + tcp) + if ! command -v nc >/dev/null; then + echoerr 'nc command is missing!' + exit 1 + fi + ;; + http) + if ! command -v wget >/dev/null; then + echoerr 'wget command is missing!' + exit 1 + fi + ;; + esac + + TIMEOUT_END=$(($(date +%s) + TIMEOUT)) + + while :; do + case "$PROTOCOL" in + tcp) + nc -w 1 -z "$HOST" "$PORT" > /dev/null 2>&1 + ;; + http) + wget --timeout=1 --tries=1 -q "$HOST" -O /dev/null > /dev/null 2>&1 + ;; + *) + echoerr "Unknown protocol '$PROTOCOL'" + exit 1 + ;; + esac + + result=$? + + if [ $result -eq 0 ] ; then + if [ $# -gt 7 ] ; then + for result in $(seq $(($# - 7))); do + result=$1 + shift + set -- "$@" "$result" + done + + TIMEOUT=$2 QUIET=$3 PROTOCOL=$4 HOST=$5 PORT=$6 result=$7 + shift 7 + exec "$@" + fi + exit 0 + fi + + if [ $TIMEOUT -ne 0 -a $(date +%s) -ge $TIMEOUT_END ]; then + echo "Operation timed out" >&2 + exit 1 + fi + + sleep 1 + done +} + +while :; do + case "$1" in + http://*|https://*) + HOST="$1" + PROTOCOL="http" + shift 1 + ;; + *:* ) + HOST=$(printf "%s\n" "$1"| cut -d : -f 1) + PORT=$(printf "%s\n" "$1"| cut -d : -f 2) + shift 1 + ;; + -v | --version) + echo $VERSION + exit + ;; + -q | --quiet) + QUIET=1 + shift 1 + ;; + -q-*) + QUIET=0 + echoerr "Unknown option: $1" + usage 1 + ;; + -q*) + QUIET=1 + result=$1 + shift 1 + set -- -"${result#-q}" "$@" + ;; + -t | --timeout) + TIMEOUT="$2" + shift 2 + ;; + -t*) + TIMEOUT="${1#-t}" + shift 1 + ;; + --timeout=*) + TIMEOUT="${1#*=}" + shift 1 + ;; + --) + shift + break + ;; + --help) + usage 0 + ;; + -*) + QUIET=0 + echoerr "Unknown option: $1" + usage 1 + ;; + *) + QUIET=0 + echoerr "Unknown argument: $1" + usage 1 + ;; + esac +done + +if ! [ "$TIMEOUT" -ge 0 ] 2>/dev/null; then + echoerr "Error: invalid timeout '$TIMEOUT'" + usage 3 +fi + +case "$PROTOCOL" in + tcp) + if [ "$HOST" = "" ] || [ "$PORT" = "" ]; then + echoerr "Error: you need to provide a host and port to test." + usage 2 + fi + ;; + http) + if [ "$HOST" = "" ]; then + echoerr "Error: you need to provide a host to test." + usage 2 + fi + ;; +esac + +wait_for "$@" \ No newline at end of file