first commit

main
chunquansang 11 months ago
parent d15e5b0546
commit 0f58c596c7

@ -0,0 +1,28 @@
FROM python:3.7
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
# 安装netcat
RUN apt-get update
# 可选:设置镜像源为国内
COPY pip.conf /root/.pip/pip.conf
# 容器内创建 myproject 文件夹
ENV APP_HOME=/home/myproject
RUN mkdir -p $APP_HOME
WORKDIR $APP_HOME
# 将当前目录加入到工作目录中(. 表示当前目录)
ADD . $APP_HOME
# 更新pip版本
RUN /usr/local/bin/python -m pip install --upgrade pip
# 安装vim
RUN apt-get install -y vim
# 安装项目依赖
RUN pip install -r requirements.txt
CMD ["sh", "-c", "$APP_HOME/restart_service.sh & python main.py"]

@ -0,0 +1,185 @@
# main.py
import pymysql
import time
from dotenv import load_dotenv
import os
import requests
import logging
import hashlib
# Load environment variables from .env file
load_dotenv()
logging.basicConfig(
filename='download.log',
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
def get_file_hash(filename):
if os.path.isfile(filename) is False:
return None
# make a hash object
h_sha256 = hashlib.sha256()
# open file for reading in binary mode
with open(filename, "rb") as file:
# read file in chunks and update hash
chunk = 0
while chunk != b"":
chunk = file.read(1024)
h_sha256.update(chunk)
# return the hex digest
return h_sha256.hexdigest()
def connect_to_mysql():
try:
connection = pymysql.connect(
host=os.getenv('MYSQL_HOST'),
port=int(os.getenv('MYSQL_PORT')),
user=os.getenv('MYSQL_USER'),
password=os.getenv('MYSQL_PASSWORD'),
database=os.getenv('MYSQL_DATABASE'),
cursorclass=pymysql.cursors.DictCursor
)
if connection.open:
return connection
except pymysql.Error as e:
logger.error(f"Error: {e}")
return None
def fetch_oldest_video(connection):
try:
with connection.cursor() as cursor:
query = "SELECT * FROM tp_videos WHERE download_status = 0 ORDER BY create_time LIMIT 1;"
cursor.execute(query)
result = cursor.fetchone()
return result
except pymysql.Error as e:
logger.error(f"Error fetching data: {e}")
return None
def fetch_same_video(connection, video_hash):
try:
with connection.cursor() as cursor:
sql = "SELECT * FROM tp_videos WHERE video_hash = %s ORDER BY id DESC LIMIT 1"
# 执行 SQL 查询
cursor.execute(sql, (video_hash,))
result = cursor.fetchone()
return result
except pymysql.Error as e:
logger.error(f"Error fetching data: {e}")
return None
def save_download_success(connection, id_, video_hash, is_same=False):
try:
with connection.cursor() as cursor:
query = "UPDATE tp_videos SET download_status=2, video_hash=%s, video_status=%s WHERE id=%s;"
if is_same:
cursor.execute(query, (video_hash, 4, id_))
else:
cursor.execute(query, (video_hash, 1, id_))
connection.commit()
except pymysql.Error as e:
with connection.cursor() as cursor:
query = "UPDATE tp_videos SET download_status=3 WHERE id=%s;"
cursor.execute(query, (id_))
connection.commit()
logger.error(f"更新失败: {e}")
except Exception as e1:
logger.error(f"更新失败: {e1}")
def save_download_error(connection, id_):
try:
with connection.cursor() as cursor:
query = "UPDATE tp_videos SET download_status=3 WHERE id=%s;"
cursor.execute(query, (id_, ))
connection.commit()
except pymysql.Error as e:
logger.error(f"更新失败: {e}")
def download_video(video_data):
download_path = video_data.get('video_path', '')
file_name = video_data.get('video_name', '')
ftp_path = video_data.get('ftp_path', '')
if download_path and file_name and ftp_path:
try:
# 发送带有流式传输的 GET 请求
response = requests.get(ftp_path, stream=True)
# 检查响应状态码是否为 200
if response.status_code == 200:
with open(download_path, 'wb') as file:
# 逐块写入文件
for chunk in response.iter_content(chunk_size=8192):
if chunk:
file.write(chunk)
logger.info(f"下载完成: {download_path}")
else:
logger.error(f"下载失败:状态码:{response.status_code}")
except requests.RequestException as e:
logger.error(f"下载失败: {e}")
raise requests.RequestException(f"下载失败: {e}")
except IOError as e:
logger.error(f"文件操作失败: {e}")
raise IOError(f"文件操作失败: {e}")
def main():
count = 0
try:
while True:
connection = connect_to_mysql()
if not connection:
time.sleep(10)
continue
video_data = fetch_oldest_video(connection)
if video_data and video_data.get('id', None):
logger.info(str(video_data))
# video_url = video_data['url'] # Assuming URL is in the 'url' column
try:
download_video(video_data)
except Exception as e:
logger.error(f"保存异常{e}")
save_download_error(connection, video_data['id'])
else:
download_path = video_data.get('video_path', '')
video_hash = get_file_hash(download_path)
same_rsult = fetch_same_video(connection, video_hash)
if same_rsult:
try:
os.remove(download_path)
logger.info(f"删除重复视频:{download_path}")
except Exception as e:
logger.error(f"删除重复视频失败,原因为:{e}")
save_download_success(connection, video_data['id'], video_hash, is_same=True)
else:
save_download_success(connection, video_data['id'], video_hash, is_same=False)
time.sleep(2)
else:
print("No videos to download. Waiting...")
time.sleep(5)
count += 1
# if count == 500:
# logger.info(f"重启")
# break
except KeyboardInterrupt:
logger.info("Stopping the downloader.")
finally:
connection.close()
if __name__ == "__main__":
main()

@ -0,0 +1,6 @@
[global]
index-url = https://pypi.tuna.tsinghua.edu.cn/simple
[install]
trusted-host = https://pypi.tuna.tsinghua.edu.cn
[list]
format=columns

@ -0,0 +1,5 @@
pymysql
requests
python-dotenv
cryptography
tqdm

@ -0,0 +1,19 @@
#!/bin/bash
while true; do
# 寻找 Python 进程并杀死
sleep 1800
pids=$(pgrep -f "python main.py")
if [ -n "$pids" ]; then
echo "Killing Python processes with PIDs: $pids"
# 获取当前时间
current_time=$(date "+%Y-%m-%d %H:%M:%S")
# 使用循环逐个杀死进程
for pid in $pids; do
kill "$pid"
echo "Restart at $current_time" >> log.txt # 将信息追加到log.txt文件中
done
else
echo "Python process not found."
fi
done

@ -0,0 +1,192 @@
#!/bin/sh
# The MIT License (MIT)
#
# Copyright (c) 2017 Eficode Oy
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
VERSION="2.2.4"
set -- "$@" -- "$TIMEOUT" "$QUIET" "$PROTOCOL" "$HOST" "$PORT" "$result"
TIMEOUT=15
QUIET=0
# The protocol to make the request with, either "tcp" or "http"
PROTOCOL="tcp"
echoerr() {
if [ "$QUIET" -ne 1 ]; then printf "%s\n" "$*" 1>&2; fi
}
usage() {
exitcode="$1"
cat << USAGE >&2
Usage:
$0 host:port|url [-t timeout] [-- command args]
-q | --quiet Do not output any status messages
-t TIMEOUT | --timeout=timeout Timeout in seconds, zero for no timeout
Defaults to 15 seconds
-v | --version Show the version of this tool
-- COMMAND ARGS Execute command with args after the test finishes
USAGE
exit "$exitcode"
}
wait_for() {
case "$PROTOCOL" in
tcp)
if ! command -v nc >/dev/null; then
echoerr 'nc command is missing!'
exit 1
fi
;;
http)
if ! command -v wget >/dev/null; then
echoerr 'wget command is missing!'
exit 1
fi
;;
esac
TIMEOUT_END=$(($(date +%s) + TIMEOUT))
while :; do
case "$PROTOCOL" in
tcp)
nc -w 1 -z "$HOST" "$PORT" > /dev/null 2>&1
;;
http)
wget --timeout=1 --tries=1 -q "$HOST" -O /dev/null > /dev/null 2>&1
;;
*)
echoerr "Unknown protocol '$PROTOCOL'"
exit 1
;;
esac
result=$?
if [ $result -eq 0 ] ; then
if [ $# -gt 7 ] ; then
for result in $(seq $(($# - 7))); do
result=$1
shift
set -- "$@" "$result"
done
TIMEOUT=$2 QUIET=$3 PROTOCOL=$4 HOST=$5 PORT=$6 result=$7
shift 7
exec "$@"
fi
exit 0
fi
if [ $TIMEOUT -ne 0 -a $(date +%s) -ge $TIMEOUT_END ]; then
echo "Operation timed out" >&2
exit 1
fi
sleep 1
done
}
while :; do
case "$1" in
http://*|https://*)
HOST="$1"
PROTOCOL="http"
shift 1
;;
*:* )
HOST=$(printf "%s\n" "$1"| cut -d : -f 1)
PORT=$(printf "%s\n" "$1"| cut -d : -f 2)
shift 1
;;
-v | --version)
echo $VERSION
exit
;;
-q | --quiet)
QUIET=1
shift 1
;;
-q-*)
QUIET=0
echoerr "Unknown option: $1"
usage 1
;;
-q*)
QUIET=1
result=$1
shift 1
set -- -"${result#-q}" "$@"
;;
-t | --timeout)
TIMEOUT="$2"
shift 2
;;
-t*)
TIMEOUT="${1#-t}"
shift 1
;;
--timeout=*)
TIMEOUT="${1#*=}"
shift 1
;;
--)
shift
break
;;
--help)
usage 0
;;
-*)
QUIET=0
echoerr "Unknown option: $1"
usage 1
;;
*)
QUIET=0
echoerr "Unknown argument: $1"
usage 1
;;
esac
done
if ! [ "$TIMEOUT" -ge 0 ] 2>/dev/null; then
echoerr "Error: invalid timeout '$TIMEOUT'"
usage 3
fi
case "$PROTOCOL" in
tcp)
if [ "$HOST" = "" ] || [ "$PORT" = "" ]; then
echoerr "Error: you need to provide a host and port to test."
usage 2
fi
;;
http)
if [ "$HOST" = "" ]; then
echoerr "Error: you need to provide a host to test."
usage 2
fi
;;
esac
wait_for "$@"
Loading…
Cancel
Save