first commit
parent
d15e5b0546
commit
0f58c596c7
@ -0,0 +1,28 @@
|
||||
FROM python:3.7
|
||||
|
||||
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
|
||||
# 安装netcat
|
||||
RUN apt-get update
|
||||
|
||||
# 可选:设置镜像源为国内
|
||||
COPY pip.conf /root/.pip/pip.conf
|
||||
|
||||
# 容器内创建 myproject 文件夹
|
||||
ENV APP_HOME=/home/myproject
|
||||
RUN mkdir -p $APP_HOME
|
||||
|
||||
WORKDIR $APP_HOME
|
||||
|
||||
# 将当前目录加入到工作目录中(. 表示当前目录)
|
||||
ADD . $APP_HOME
|
||||
|
||||
# 更新pip版本
|
||||
RUN /usr/local/bin/python -m pip install --upgrade pip
|
||||
|
||||
# 安装vim
|
||||
RUN apt-get install -y vim
|
||||
|
||||
# 安装项目依赖
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
CMD ["sh", "-c", "$APP_HOME/restart_service.sh & python main.py"]
|
@ -0,0 +1,185 @@
|
||||
# main.py
|
||||
|
||||
import pymysql
|
||||
import time
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
import hashlib
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
logging.basicConfig(
|
||||
filename='download.log',
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_file_hash(filename):
|
||||
if os.path.isfile(filename) is False:
|
||||
return None
|
||||
|
||||
# make a hash object
|
||||
h_sha256 = hashlib.sha256()
|
||||
|
||||
# open file for reading in binary mode
|
||||
with open(filename, "rb") as file:
|
||||
# read file in chunks and update hash
|
||||
chunk = 0
|
||||
while chunk != b"":
|
||||
chunk = file.read(1024)
|
||||
h_sha256.update(chunk)
|
||||
|
||||
# return the hex digest
|
||||
return h_sha256.hexdigest()
|
||||
|
||||
|
||||
def connect_to_mysql():
|
||||
try:
|
||||
connection = pymysql.connect(
|
||||
host=os.getenv('MYSQL_HOST'),
|
||||
port=int(os.getenv('MYSQL_PORT')),
|
||||
user=os.getenv('MYSQL_USER'),
|
||||
password=os.getenv('MYSQL_PASSWORD'),
|
||||
database=os.getenv('MYSQL_DATABASE'),
|
||||
cursorclass=pymysql.cursors.DictCursor
|
||||
)
|
||||
if connection.open:
|
||||
return connection
|
||||
except pymysql.Error as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return None
|
||||
|
||||
def fetch_oldest_video(connection):
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
query = "SELECT * FROM tp_videos WHERE download_status = 0 ORDER BY create_time LIMIT 1;"
|
||||
cursor.execute(query)
|
||||
result = cursor.fetchone()
|
||||
return result
|
||||
except pymysql.Error as e:
|
||||
logger.error(f"Error fetching data: {e}")
|
||||
return None
|
||||
|
||||
def fetch_same_video(connection, video_hash):
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
sql = "SELECT * FROM tp_videos WHERE video_hash = %s ORDER BY id DESC LIMIT 1"
|
||||
|
||||
# 执行 SQL 查询
|
||||
cursor.execute(sql, (video_hash,))
|
||||
result = cursor.fetchone()
|
||||
return result
|
||||
except pymysql.Error as e:
|
||||
logger.error(f"Error fetching data: {e}")
|
||||
return None
|
||||
|
||||
def save_download_success(connection, id_, video_hash, is_same=False):
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
query = "UPDATE tp_videos SET download_status=2, video_hash=%s, video_status=%s WHERE id=%s;"
|
||||
if is_same:
|
||||
cursor.execute(query, (video_hash, 4, id_))
|
||||
else:
|
||||
cursor.execute(query, (video_hash, 1, id_))
|
||||
connection.commit()
|
||||
except pymysql.Error as e:
|
||||
with connection.cursor() as cursor:
|
||||
query = "UPDATE tp_videos SET download_status=3 WHERE id=%s;"
|
||||
cursor.execute(query, (id_))
|
||||
connection.commit()
|
||||
logger.error(f"更新失败: {e}")
|
||||
except Exception as e1:
|
||||
logger.error(f"更新失败: {e1}")
|
||||
|
||||
def save_download_error(connection, id_):
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
query = "UPDATE tp_videos SET download_status=3 WHERE id=%s;"
|
||||
cursor.execute(query, (id_, ))
|
||||
connection.commit()
|
||||
except pymysql.Error as e:
|
||||
logger.error(f"更新失败: {e}")
|
||||
|
||||
|
||||
def download_video(video_data):
|
||||
download_path = video_data.get('video_path', '')
|
||||
file_name = video_data.get('video_name', '')
|
||||
ftp_path = video_data.get('ftp_path', '')
|
||||
|
||||
if download_path and file_name and ftp_path:
|
||||
|
||||
try:
|
||||
# 发送带有流式传输的 GET 请求
|
||||
response = requests.get(ftp_path, stream=True)
|
||||
# 检查响应状态码是否为 200
|
||||
if response.status_code == 200:
|
||||
with open(download_path, 'wb') as file:
|
||||
# 逐块写入文件
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
file.write(chunk)
|
||||
|
||||
logger.info(f"下载完成: {download_path}")
|
||||
|
||||
else:
|
||||
logger.error(f"下载失败:状态码:{response.status_code}")
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"下载失败: {e}")
|
||||
raise requests.RequestException(f"下载失败: {e}")
|
||||
except IOError as e:
|
||||
logger.error(f"文件操作失败: {e}")
|
||||
raise IOError(f"文件操作失败: {e}")
|
||||
|
||||
def main():
|
||||
count = 0
|
||||
try:
|
||||
while True:
|
||||
connection = connect_to_mysql()
|
||||
if not connection:
|
||||
time.sleep(10)
|
||||
continue
|
||||
video_data = fetch_oldest_video(connection)
|
||||
if video_data and video_data.get('id', None):
|
||||
logger.info(str(video_data))
|
||||
|
||||
# video_url = video_data['url'] # Assuming URL is in the 'url' column
|
||||
try:
|
||||
download_video(video_data)
|
||||
except Exception as e:
|
||||
logger.error(f"保存异常{e}")
|
||||
save_download_error(connection, video_data['id'])
|
||||
else:
|
||||
download_path = video_data.get('video_path', '')
|
||||
video_hash = get_file_hash(download_path)
|
||||
same_rsult = fetch_same_video(connection, video_hash)
|
||||
if same_rsult:
|
||||
try:
|
||||
os.remove(download_path)
|
||||
logger.info(f"删除重复视频:{download_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"删除重复视频失败,原因为:{e}")
|
||||
|
||||
save_download_success(connection, video_data['id'], video_hash, is_same=True)
|
||||
else:
|
||||
save_download_success(connection, video_data['id'], video_hash, is_same=False)
|
||||
time.sleep(2)
|
||||
else:
|
||||
print("No videos to download. Waiting...")
|
||||
time.sleep(5)
|
||||
count += 1
|
||||
# if count == 500:
|
||||
# logger.info(f"重启")
|
||||
# break
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Stopping the downloader.")
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,6 @@
|
||||
[global]
|
||||
index-url = https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
[install]
|
||||
trusted-host = https://pypi.tuna.tsinghua.edu.cn
|
||||
[list]
|
||||
format=columns
|
@ -0,0 +1,5 @@
|
||||
pymysql
|
||||
requests
|
||||
python-dotenv
|
||||
cryptography
|
||||
tqdm
|
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
while true; do
|
||||
# 寻找 Python 进程并杀死
|
||||
sleep 1800
|
||||
pids=$(pgrep -f "python main.py")
|
||||
if [ -n "$pids" ]; then
|
||||
echo "Killing Python processes with PIDs: $pids"
|
||||
# 获取当前时间
|
||||
current_time=$(date "+%Y-%m-%d %H:%M:%S")
|
||||
# 使用循环逐个杀死进程
|
||||
for pid in $pids; do
|
||||
kill "$pid"
|
||||
echo "Restart at $current_time" >> log.txt # 将信息追加到log.txt文件中
|
||||
done
|
||||
else
|
||||
echo "Python process not found."
|
||||
fi
|
||||
done
|
@ -0,0 +1,192 @@
|
||||
#!/bin/sh
|
||||
|
||||
# The MIT License (MIT)
|
||||
#
|
||||
# Copyright (c) 2017 Eficode Oy
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
VERSION="2.2.4"
|
||||
|
||||
set -- "$@" -- "$TIMEOUT" "$QUIET" "$PROTOCOL" "$HOST" "$PORT" "$result"
|
||||
TIMEOUT=15
|
||||
QUIET=0
|
||||
# The protocol to make the request with, either "tcp" or "http"
|
||||
PROTOCOL="tcp"
|
||||
|
||||
echoerr() {
|
||||
if [ "$QUIET" -ne 1 ]; then printf "%s\n" "$*" 1>&2; fi
|
||||
}
|
||||
|
||||
usage() {
|
||||
exitcode="$1"
|
||||
cat << USAGE >&2
|
||||
Usage:
|
||||
$0 host:port|url [-t timeout] [-- command args]
|
||||
-q | --quiet Do not output any status messages
|
||||
-t TIMEOUT | --timeout=timeout Timeout in seconds, zero for no timeout
|
||||
Defaults to 15 seconds
|
||||
-v | --version Show the version of this tool
|
||||
-- COMMAND ARGS Execute command with args after the test finishes
|
||||
USAGE
|
||||
exit "$exitcode"
|
||||
}
|
||||
|
||||
wait_for() {
|
||||
case "$PROTOCOL" in
|
||||
tcp)
|
||||
if ! command -v nc >/dev/null; then
|
||||
echoerr 'nc command is missing!'
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
http)
|
||||
if ! command -v wget >/dev/null; then
|
||||
echoerr 'wget command is missing!'
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
TIMEOUT_END=$(($(date +%s) + TIMEOUT))
|
||||
|
||||
while :; do
|
||||
case "$PROTOCOL" in
|
||||
tcp)
|
||||
nc -w 1 -z "$HOST" "$PORT" > /dev/null 2>&1
|
||||
;;
|
||||
http)
|
||||
wget --timeout=1 --tries=1 -q "$HOST" -O /dev/null > /dev/null 2>&1
|
||||
;;
|
||||
*)
|
||||
echoerr "Unknown protocol '$PROTOCOL'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
result=$?
|
||||
|
||||
if [ $result -eq 0 ] ; then
|
||||
if [ $# -gt 7 ] ; then
|
||||
for result in $(seq $(($# - 7))); do
|
||||
result=$1
|
||||
shift
|
||||
set -- "$@" "$result"
|
||||
done
|
||||
|
||||
TIMEOUT=$2 QUIET=$3 PROTOCOL=$4 HOST=$5 PORT=$6 result=$7
|
||||
shift 7
|
||||
exec "$@"
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ $TIMEOUT -ne 0 -a $(date +%s) -ge $TIMEOUT_END ]; then
|
||||
echo "Operation timed out" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
while :; do
|
||||
case "$1" in
|
||||
http://*|https://*)
|
||||
HOST="$1"
|
||||
PROTOCOL="http"
|
||||
shift 1
|
||||
;;
|
||||
*:* )
|
||||
HOST=$(printf "%s\n" "$1"| cut -d : -f 1)
|
||||
PORT=$(printf "%s\n" "$1"| cut -d : -f 2)
|
||||
shift 1
|
||||
;;
|
||||
-v | --version)
|
||||
echo $VERSION
|
||||
exit
|
||||
;;
|
||||
-q | --quiet)
|
||||
QUIET=1
|
||||
shift 1
|
||||
;;
|
||||
-q-*)
|
||||
QUIET=0
|
||||
echoerr "Unknown option: $1"
|
||||
usage 1
|
||||
;;
|
||||
-q*)
|
||||
QUIET=1
|
||||
result=$1
|
||||
shift 1
|
||||
set -- -"${result#-q}" "$@"
|
||||
;;
|
||||
-t | --timeout)
|
||||
TIMEOUT="$2"
|
||||
shift 2
|
||||
;;
|
||||
-t*)
|
||||
TIMEOUT="${1#-t}"
|
||||
shift 1
|
||||
;;
|
||||
--timeout=*)
|
||||
TIMEOUT="${1#*=}"
|
||||
shift 1
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
--help)
|
||||
usage 0
|
||||
;;
|
||||
-*)
|
||||
QUIET=0
|
||||
echoerr "Unknown option: $1"
|
||||
usage 1
|
||||
;;
|
||||
*)
|
||||
QUIET=0
|
||||
echoerr "Unknown argument: $1"
|
||||
usage 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if ! [ "$TIMEOUT" -ge 0 ] 2>/dev/null; then
|
||||
echoerr "Error: invalid timeout '$TIMEOUT'"
|
||||
usage 3
|
||||
fi
|
||||
|
||||
case "$PROTOCOL" in
|
||||
tcp)
|
||||
if [ "$HOST" = "" ] || [ "$PORT" = "" ]; then
|
||||
echoerr "Error: you need to provide a host and port to test."
|
||||
usage 2
|
||||
fi
|
||||
;;
|
||||
http)
|
||||
if [ "$HOST" = "" ]; then
|
||||
echoerr "Error: you need to provide a host to test."
|
||||
usage 2
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
wait_for "$@"
|
Loading…
Reference in New Issue