diff --git a/deployment/docker/Dockerfile b/deployment/docker/Dockerfile index a16f369..c41a2a8 100644 --- a/deployment/docker/Dockerfile +++ b/deployment/docker/Dockerfile @@ -2,9 +2,11 @@ ARG MUSIC_FOLDER_ARG=/nas/music ARG MUSIC_URL_ARG=https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C +ARG MUSIC_OP_ARG=download ENV MUSIC_URL=${MUSIC_URL_ARG} ENV MUSIC_FOLDER=${MUSIC_FOLDER_ARG} +ENV MUSIC_OP=${MUSIC_OP_ARG} COPY requirements.txt / RUN pip install -r requirements.txt diff --git a/deployment/docker/start.sh b/deployment/docker/start.sh index a7a92cb..e64c689 100644 --- a/deployment/docker/start.sh +++ b/deployment/docker/start.sh @@ -1,10 +1,19 @@ -#!/bin/sh -while true -do - echo "" - echo "" - echo "Fetching Musics happens at: $(date +"%Y/%m/%d %I:%M:%S")" - echo "#########################################" - python3 /main.py - sleep 1d -done \ No newline at end of file +#!/bin/bash +# while true +# do +# echo "" +# echo "" +# echo "Fetching Musics happens at: $(date +"%Y/%m/%d %I:%M:%S")" +# echo "#########################################" +if [[ "${MUSIC_OP}" == "download" ]]; then + echo "Start downloading the music" + python3 /download.py +elif [[ "${MUSIC_OP}" == "patch" ]]; then + echo "Start patching the music" + python3 /patch.py +else + echo "Invalid MUSIC_OP: \"${MUSIC_OP}\"" +fi + +# sleep 1d +# done \ No newline at end of file diff --git a/src/download.py b/src/download.py new file mode 100644 index 0000000..bc62916 --- /dev/null +++ b/src/download.py @@ -0,0 +1,123 @@ +import requests +from bs4 import BeautifulSoup +import re +from urllib.parse import unquote +import os +import time +import glob +from dataclasses import dataclass +import json +from io import StringIO +from contextlib import redirect_stdout +from patch import MusicPatcher +import string + +mp3_url_template = "https://antiserver.kuwo.cn/anti.s?type=convert_url&rid={}&format=mp3&response=url" +music_name_template = "{name}_{artist}_{album}_{id}.mp3" + +destination_folder = os.getenv('MUSIC_FOLDER', "/nas/music") +root_url = os.getenv("MUSIC_URL", "https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C") +@dataclass +class Music: + def __init__(self, id: str, artist: str, album: str, name: str, pay:str): + self.artist = artist + self.album = album + self.id = id + self.name = name + self.pay = pay + self.url = "" + self.targeting_file = "" + self.status ="pending" + self.file_name = "" + + def exists(self): + return os.path.exists(self.targeting_file) + + def download(self): + print("Checking url " + self.url) + r = requests.get(self.url, verify=False, timeout=5) + if r.status_code == 200: + with open(self.targeting_file, 'wb') as f: + f.write(r.content) + self.status = "success" + else: + self.status = "fail" + r.close() + return self.status + +def convertToUtf8(str): + f = StringIO() + res = "" + for i, ch in enumerate(str): + if ch =='x' and len(str) > i+2 and all(c in string.hexdigits for c in str[i+1: i+3]): + res += "\\x" + else: + res += ch + with redirect_stdout(f): + exec('print(b\''+ res + '\'.decode(\'utf-8\'))') + # print(f.getvalue()) + res = f.getvalue().replace("\n","") + return res + +def getMp3Url(url): + try: + res = requests.get(url, verify= False, timeout=10) + url = res.content.decode('utf-8') + res.close() + return url + except: + print("error to get mp3 url") + return "" + + +def getMusicList(url, retry): + try: + res = requests.get(root_url,verify=False, timeout=20) + content = res.content + res.close() + return content + except: + if retry > 3: + return "" + time.sleep(1) + print("unable to access the kuwo.cn. Retries: ") + getMusicList(url, retry+1) + + + + +content = getMusicList(root_url, 0) +if content == "" : + print("Unable to access the music list. Exit") + exit(0) +body = BeautifulSoup(str(content), "html.parser") +new_music_count = 0 +for item in body.find_all("div", {"class", "tools"}): + json_payload=item["data-music"].replace("'","").replace("\\","") + if not json_payload.endswith("}"): + json_payload += '", "pay":""}' + music_data = json.loads(json_payload) + music = Music(music_data["id"], convertToUtf8(music_data["artist"]) if "artist" in music_data else "", convertToUtf8(music_data["album"]) if "album" in music_data else "", convertToUtf8(music_data["name"]) if "name" in music_data else "", music_data["pay"]) + # print(music.name) + # print(music.artist) + # print(music.album) + # print(music.id) + # print(music.pay) + music.file_name = music_name_template.format(name=music.name, artist=music.artist, album=music.album, id= music.id.replace("MUSIC_", "")) + music.targeting_file = "{}/{}".format(destination_folder, music.file_name) + if music.exists(): + print("[Info] Skipping the music {}".format(music.targeting_file)) + continue + new_music_count +=1 + music.url = getMp3Url(mp3_url_template.format(music.id)) + if music.url == "": + continue + try: + music.download() + except: + print("error") + print("[Info] Download music \"{}\" with the status: {}".format(music.file_name, music.status)) +print("[Summary] Found {} new musics".format(new_music_count)) + +print("Processing the music") +MusicPatcher().process_music() \ No newline at end of file diff --git a/src/main.py b/src/main.py deleted file mode 100644 index bc62916..0000000 --- a/src/main.py +++ /dev/null @@ -1,123 +0,0 @@ -import requests -from bs4 import BeautifulSoup -import re -from urllib.parse import unquote -import os -import time -import glob -from dataclasses import dataclass -import json -from io import StringIO -from contextlib import redirect_stdout -from patch import MusicPatcher -import string - -mp3_url_template = "https://antiserver.kuwo.cn/anti.s?type=convert_url&rid={}&format=mp3&response=url" -music_name_template = "{name}_{artist}_{album}_{id}.mp3" - -destination_folder = os.getenv('MUSIC_FOLDER', "/nas/music") -root_url = os.getenv("MUSIC_URL", "https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C") -@dataclass -class Music: - def __init__(self, id: str, artist: str, album: str, name: str, pay:str): - self.artist = artist - self.album = album - self.id = id - self.name = name - self.pay = pay - self.url = "" - self.targeting_file = "" - self.status ="pending" - self.file_name = "" - - def exists(self): - return os.path.exists(self.targeting_file) - - def download(self): - print("Checking url " + self.url) - r = requests.get(self.url, verify=False, timeout=5) - if r.status_code == 200: - with open(self.targeting_file, 'wb') as f: - f.write(r.content) - self.status = "success" - else: - self.status = "fail" - r.close() - return self.status - -def convertToUtf8(str): - f = StringIO() - res = "" - for i, ch in enumerate(str): - if ch =='x' and len(str) > i+2 and all(c in string.hexdigits for c in str[i+1: i+3]): - res += "\\x" - else: - res += ch - with redirect_stdout(f): - exec('print(b\''+ res + '\'.decode(\'utf-8\'))') - # print(f.getvalue()) - res = f.getvalue().replace("\n","") - return res - -def getMp3Url(url): - try: - res = requests.get(url, verify= False, timeout=10) - url = res.content.decode('utf-8') - res.close() - return url - except: - print("error to get mp3 url") - return "" - - -def getMusicList(url, retry): - try: - res = requests.get(root_url,verify=False, timeout=20) - content = res.content - res.close() - return content - except: - if retry > 3: - return "" - time.sleep(1) - print("unable to access the kuwo.cn. Retries: ") - getMusicList(url, retry+1) - - - - -content = getMusicList(root_url, 0) -if content == "" : - print("Unable to access the music list. Exit") - exit(0) -body = BeautifulSoup(str(content), "html.parser") -new_music_count = 0 -for item in body.find_all("div", {"class", "tools"}): - json_payload=item["data-music"].replace("'","").replace("\\","") - if not json_payload.endswith("}"): - json_payload += '", "pay":""}' - music_data = json.loads(json_payload) - music = Music(music_data["id"], convertToUtf8(music_data["artist"]) if "artist" in music_data else "", convertToUtf8(music_data["album"]) if "album" in music_data else "", convertToUtf8(music_data["name"]) if "name" in music_data else "", music_data["pay"]) - # print(music.name) - # print(music.artist) - # print(music.album) - # print(music.id) - # print(music.pay) - music.file_name = music_name_template.format(name=music.name, artist=music.artist, album=music.album, id= music.id.replace("MUSIC_", "")) - music.targeting_file = "{}/{}".format(destination_folder, music.file_name) - if music.exists(): - print("[Info] Skipping the music {}".format(music.targeting_file)) - continue - new_music_count +=1 - music.url = getMp3Url(mp3_url_template.format(music.id)) - if music.url == "": - continue - try: - music.download() - except: - print("error") - print("[Info] Download music \"{}\" with the status: {}".format(music.file_name, music.status)) -print("[Summary] Found {} new musics".format(new_music_count)) - -print("Processing the music") -MusicPatcher().process_music() \ No newline at end of file