diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2bf92c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +bin +include +lib +lib64 +pyvenv.cfg +share +**/@eaDir +**/__pycache__ \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2bf92c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +bin +include +lib +lib64 +pyvenv.cfg +share +**/@eaDir +**/__pycache__ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a16f369 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +from python:slim + +ARG MUSIC_FOLDER_ARG=/nas/music +ARG MUSIC_URL_ARG=https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C + +ENV MUSIC_URL=${MUSIC_URL_ARG} +ENV MUSIC_FOLDER=${MUSIC_FOLDER_ARG} + +COPY requirements.txt / +RUN pip install -r requirements.txt +COPY src/* / +COPY start.sh / +RUN chmod +x /start.sh +ENTRYPOINT ["/start.sh"] + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2bf92c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +bin +include +lib +lib64 +pyvenv.cfg +share +**/@eaDir +**/__pycache__ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a16f369 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +from python:slim + +ARG MUSIC_FOLDER_ARG=/nas/music +ARG MUSIC_URL_ARG=https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C + +ENV MUSIC_URL=${MUSIC_URL_ARG} +ENV MUSIC_FOLDER=${MUSIC_FOLDER_ARG} + +COPY requirements.txt / +RUN pip install -r requirements.txt +COPY src/* / +COPY start.sh / +RUN chmod +x /start.sh +ENTRYPOINT ["/start.sh"] + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..36ff8cf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +beautifulsoup4==4.9.3 +bs4==0.0.1 +certifi==2021.5.30 +chardet==4.0.0 +idna==2.10 +psycopg2-binary==2.9.1 +requests==2.25.1 +soupsieve==2.2.1 +urllib3==1.26.5 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2bf92c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +bin +include +lib +lib64 +pyvenv.cfg +share +**/@eaDir +**/__pycache__ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a16f369 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +from python:slim + +ARG MUSIC_FOLDER_ARG=/nas/music +ARG MUSIC_URL_ARG=https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C + +ENV MUSIC_URL=${MUSIC_URL_ARG} +ENV MUSIC_FOLDER=${MUSIC_FOLDER_ARG} + +COPY requirements.txt / +RUN pip install -r requirements.txt +COPY src/* / +COPY start.sh / +RUN chmod +x /start.sh +ENTRYPOINT ["/start.sh"] + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..36ff8cf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +beautifulsoup4==4.9.3 +bs4==0.0.1 +certifi==2021.5.30 +chardet==4.0.0 +idna==2.10 +psycopg2-binary==2.9.1 +requests==2.25.1 +soupsieve==2.2.1 +urllib3==1.26.5 diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..c4dfd14 --- /dev/null +++ b/src/main.py @@ -0,0 +1,103 @@ +import requests +from bs4 import BeautifulSoup +import re +from urllib.parse import unquote +import os +import time +import glob +from dataclasses import dataclass +import json +from io import StringIO +from contextlib import redirect_stdout +from patch import MusicPatcher +import string + +mp3_url_template = "https://antiserver.kuwo.cn/anti.s?type=convert_url&rid={}&format=mp3&response=url" +music_name_template = "{name}_{artist}_{album}_{id}.mp3" + +destination_folder = os.getenv('MUSIC_FOLDER', "/nas/music") +root_url = os.getenv("MUSIC_URL", "https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C") +@dataclass +class Music: + def __init__(self, id: str, artist: str, album: str, name: str, pay:str): + self.artist = artist + self.album = album + self.id = id + self.name = name + self.pay = pay + self.url = "" + self.targeting_file = "" + self.status ="pending" + self.file_name = "" + + def exists(self): + return os.path.exists(self.targeting_file) + + def download(self): + print("Checking url " + self.url) + r = requests.get(self.url, verify=False, timeout=5) + if r.status_code == 200: + with open(self.targeting_file, 'wb') as f: + f.write(r.content) + self.status = "success" + else: + self.status = "fail" + r.close() + return self.status + +def convertToUtf8(str): + f = StringIO() + res = "" + for i, ch in enumerate(str): + if ch =='x' and len(str) > i+2 and all(c in string.hexdigits for c in str[i+1: i+3]): + res += "\\x" + else: + res += ch + with redirect_stdout(f): + exec('print(b\''+ res + '\'.decode(\'utf-8\'))') + # print(f.getvalue()) + res = f.getvalue().replace("\n","") + return res + +def getMp3Url(url): + res = requests.get(url) + url = res.content.decode('utf-8') + res.close() + return url + + + +res = requests.get(root_url) +body = BeautifulSoup(str(res.content), "html.parser") +res.close() +new_music_count = 0 +for item in body.find_all("div", {"class", "tools"}): + json_payload=item["data-music"].replace("'","").replace("\\","") + if not json_payload.endswith("}"): + json_payload += '", "pay":""}' + music_data = json.loads(json_payload) + music = Music(music_data["id"], convertToUtf8(music_data["artist"]) if "artist" in music_data else "", convertToUtf8(music_data["album"]) if "album" in music_data else "", convertToUtf8(music_data["name"]) if "name" in music_data else "", music_data["pay"]) + # print(music.name) + # print(music.artist) + # print(music.album) + # print(music.id) + # print(music.pay) + music.file_name = music_name_template.format(name=music.name, artist=music.artist, album=music.album, id= music.id.replace("MUSIC_", "")) + music.targeting_file = "{}/{}".format(destination_folder, music.file_name) + if music.exists(): + # print("[Info] Skipping the music {}".format(music.targeting_file)) + continue + new_music_count +=1 + music.url = getMp3Url(mp3_url_template.format(music.id)) + try: + music.download() + except: + print("error") + print("[Info] Download music \"{}\" with the status: {}".format(music.file_name, music.status)) +print("[Summary] Found {} new musics".format(new_music_count)) + +if new_music_count > 0: + print("Processing the music") + MusicPatcher().process_music() +else: + print("No need processing") \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2bf92c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +bin +include +lib +lib64 +pyvenv.cfg +share +**/@eaDir +**/__pycache__ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a16f369 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +from python:slim + +ARG MUSIC_FOLDER_ARG=/nas/music +ARG MUSIC_URL_ARG=https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C + +ENV MUSIC_URL=${MUSIC_URL_ARG} +ENV MUSIC_FOLDER=${MUSIC_FOLDER_ARG} + +COPY requirements.txt / +RUN pip install -r requirements.txt +COPY src/* / +COPY start.sh / +RUN chmod +x /start.sh +ENTRYPOINT ["/start.sh"] + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..36ff8cf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +beautifulsoup4==4.9.3 +bs4==0.0.1 +certifi==2021.5.30 +chardet==4.0.0 +idna==2.10 +psycopg2-binary==2.9.1 +requests==2.25.1 +soupsieve==2.2.1 +urllib3==1.26.5 diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..c4dfd14 --- /dev/null +++ b/src/main.py @@ -0,0 +1,103 @@ +import requests +from bs4 import BeautifulSoup +import re +from urllib.parse import unquote +import os +import time +import glob +from dataclasses import dataclass +import json +from io import StringIO +from contextlib import redirect_stdout +from patch import MusicPatcher +import string + +mp3_url_template = "https://antiserver.kuwo.cn/anti.s?type=convert_url&rid={}&format=mp3&response=url" +music_name_template = "{name}_{artist}_{album}_{id}.mp3" + +destination_folder = os.getenv('MUSIC_FOLDER', "/nas/music") +root_url = os.getenv("MUSIC_URL", "https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C") +@dataclass +class Music: + def __init__(self, id: str, artist: str, album: str, name: str, pay:str): + self.artist = artist + self.album = album + self.id = id + self.name = name + self.pay = pay + self.url = "" + self.targeting_file = "" + self.status ="pending" + self.file_name = "" + + def exists(self): + return os.path.exists(self.targeting_file) + + def download(self): + print("Checking url " + self.url) + r = requests.get(self.url, verify=False, timeout=5) + if r.status_code == 200: + with open(self.targeting_file, 'wb') as f: + f.write(r.content) + self.status = "success" + else: + self.status = "fail" + r.close() + return self.status + +def convertToUtf8(str): + f = StringIO() + res = "" + for i, ch in enumerate(str): + if ch =='x' and len(str) > i+2 and all(c in string.hexdigits for c in str[i+1: i+3]): + res += "\\x" + else: + res += ch + with redirect_stdout(f): + exec('print(b\''+ res + '\'.decode(\'utf-8\'))') + # print(f.getvalue()) + res = f.getvalue().replace("\n","") + return res + +def getMp3Url(url): + res = requests.get(url) + url = res.content.decode('utf-8') + res.close() + return url + + + +res = requests.get(root_url) +body = BeautifulSoup(str(res.content), "html.parser") +res.close() +new_music_count = 0 +for item in body.find_all("div", {"class", "tools"}): + json_payload=item["data-music"].replace("'","").replace("\\","") + if not json_payload.endswith("}"): + json_payload += '", "pay":""}' + music_data = json.loads(json_payload) + music = Music(music_data["id"], convertToUtf8(music_data["artist"]) if "artist" in music_data else "", convertToUtf8(music_data["album"]) if "album" in music_data else "", convertToUtf8(music_data["name"]) if "name" in music_data else "", music_data["pay"]) + # print(music.name) + # print(music.artist) + # print(music.album) + # print(music.id) + # print(music.pay) + music.file_name = music_name_template.format(name=music.name, artist=music.artist, album=music.album, id= music.id.replace("MUSIC_", "")) + music.targeting_file = "{}/{}".format(destination_folder, music.file_name) + if music.exists(): + # print("[Info] Skipping the music {}".format(music.targeting_file)) + continue + new_music_count +=1 + music.url = getMp3Url(mp3_url_template.format(music.id)) + try: + music.download() + except: + print("error") + print("[Info] Download music \"{}\" with the status: {}".format(music.file_name, music.status)) +print("[Summary] Found {} new musics".format(new_music_count)) + +if new_music_count > 0: + print("Processing the music") + MusicPatcher().process_music() +else: + print("No need processing") \ No newline at end of file diff --git a/src/patch.py b/src/patch.py new file mode 100644 index 0000000..22361ed --- /dev/null +++ b/src/patch.py @@ -0,0 +1,60 @@ +import psycopg2 + +class MusicPatcher: + connection = False + def __init__(self): + self.connection = psycopg2.connect(user="postgres", + port="5432", + database="mediaserver") + self.connection.set_client_encoding('UTF8') + + def __update(self, sql, params=()): + cur = self.connection.cursor() + cur.execute(sql, params) + updated_rows = cur.rowcount + print("{} row changed for {}".format(updated_rows , sql)) + self.connection.commit() + cur.close() + + def __select(self, sql, params=()): + cur = self.connection.cursor() + cur.execute(sql, params) + rows = cur.fetchall() + cur.close() + return rows + def __update_music_metadata(self, id, title, artist, album,album_artist, orginal_info): + sql = """ UPDATE MUSIC + SET title = %s, artist= %s, album= %s, album_artist= %s, comment= %s + WHERE id = %s""" + # execute the UPDATE statement + self.__update(self,sql,(title, artist, album, album_artist, orginal_info, id)) + def close(self): + if self.connection: + self.connection.close() + + def process_music(self): + postgreSQL_select_Query = "SELECT ID,path FROM track where comment=''" + track_records = self.__select(postgreSQL_select_Query) + for track_record in track_records: + path_tokens = track_record[1].split("/") + tokens =path_tokens[len(path_tokens)-1].replace(".mp3","").split("_") + if len(tokens) != 4: + continue + artist_tracks = self.__select("SELECT track from artist_track where track = {}".format(track_record[0])) + if(len(artist_tracks) ==0): + sql_query = "INSERT INTO artist_track (track, artist, artist_sort, artist_search, has_album_artist) values ({},'{}','{}','{}','f')".format(track_record[0],tokens[1],tokens[1],tokens[1]) + self.__update(sql_query) + else: + sql_query = "UPDATE artist_track set artist='{}', artist_sort='{}', artist_search='{}' where track = {}".format(tokens[1],tokens[1],tokens[1],track_record[0]) + self.__update(sql_query) + + album_tracks = self.__select("SELECT * from album_track where track = {}".format(track_record[0])) + if(len(album_tracks) ==0): + sql_query = "INSERT INTO album_track (track, album, album_sort, album_search, album_artist, album_artist_sort, album_artist_search,from_album_artist) values ({},'{}','{}','{}','{}','{}','{}','t')".format(track_record[0],tokens[2],tokens[2],tokens[2],tokens[1],tokens[1],tokens[1]) + self.__update(sql_query) + else: + sql_query = "UPDATE album_track set album='{}', album_sort='{}', album_search='{}', album_artist='{}',album_artist_sort='{}', album_artist_search='{}' where track = {}".format(tokens[2],tokens[2],tokens[2],tokens[1],tokens[1],tokens[1],track_record[0]) + self.__update(sql_query) + + sql_query = "UPDATE track set title='{}' , title_search='{}', comment={} where id={}".format(tokens[0],tokens[0],tokens[3],track_record[0]) + self.__update(sql_query) \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2bf92c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +bin +include +lib +lib64 +pyvenv.cfg +share +**/@eaDir +**/__pycache__ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a16f369 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +from python:slim + +ARG MUSIC_FOLDER_ARG=/nas/music +ARG MUSIC_URL_ARG=https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C + +ENV MUSIC_URL=${MUSIC_URL_ARG} +ENV MUSIC_FOLDER=${MUSIC_FOLDER_ARG} + +COPY requirements.txt / +RUN pip install -r requirements.txt +COPY src/* / +COPY start.sh / +RUN chmod +x /start.sh +ENTRYPOINT ["/start.sh"] + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..36ff8cf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +beautifulsoup4==4.9.3 +bs4==0.0.1 +certifi==2021.5.30 +chardet==4.0.0 +idna==2.10 +psycopg2-binary==2.9.1 +requests==2.25.1 +soupsieve==2.2.1 +urllib3==1.26.5 diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..c4dfd14 --- /dev/null +++ b/src/main.py @@ -0,0 +1,103 @@ +import requests +from bs4 import BeautifulSoup +import re +from urllib.parse import unquote +import os +import time +import glob +from dataclasses import dataclass +import json +from io import StringIO +from contextlib import redirect_stdout +from patch import MusicPatcher +import string + +mp3_url_template = "https://antiserver.kuwo.cn/anti.s?type=convert_url&rid={}&format=mp3&response=url" +music_name_template = "{name}_{artist}_{album}_{id}.mp3" + +destination_folder = os.getenv('MUSIC_FOLDER', "/nas/music") +root_url = os.getenv("MUSIC_URL", "https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C") +@dataclass +class Music: + def __init__(self, id: str, artist: str, album: str, name: str, pay:str): + self.artist = artist + self.album = album + self.id = id + self.name = name + self.pay = pay + self.url = "" + self.targeting_file = "" + self.status ="pending" + self.file_name = "" + + def exists(self): + return os.path.exists(self.targeting_file) + + def download(self): + print("Checking url " + self.url) + r = requests.get(self.url, verify=False, timeout=5) + if r.status_code == 200: + with open(self.targeting_file, 'wb') as f: + f.write(r.content) + self.status = "success" + else: + self.status = "fail" + r.close() + return self.status + +def convertToUtf8(str): + f = StringIO() + res = "" + for i, ch in enumerate(str): + if ch =='x' and len(str) > i+2 and all(c in string.hexdigits for c in str[i+1: i+3]): + res += "\\x" + else: + res += ch + with redirect_stdout(f): + exec('print(b\''+ res + '\'.decode(\'utf-8\'))') + # print(f.getvalue()) + res = f.getvalue().replace("\n","") + return res + +def getMp3Url(url): + res = requests.get(url) + url = res.content.decode('utf-8') + res.close() + return url + + + +res = requests.get(root_url) +body = BeautifulSoup(str(res.content), "html.parser") +res.close() +new_music_count = 0 +for item in body.find_all("div", {"class", "tools"}): + json_payload=item["data-music"].replace("'","").replace("\\","") + if not json_payload.endswith("}"): + json_payload += '", "pay":""}' + music_data = json.loads(json_payload) + music = Music(music_data["id"], convertToUtf8(music_data["artist"]) if "artist" in music_data else "", convertToUtf8(music_data["album"]) if "album" in music_data else "", convertToUtf8(music_data["name"]) if "name" in music_data else "", music_data["pay"]) + # print(music.name) + # print(music.artist) + # print(music.album) + # print(music.id) + # print(music.pay) + music.file_name = music_name_template.format(name=music.name, artist=music.artist, album=music.album, id= music.id.replace("MUSIC_", "")) + music.targeting_file = "{}/{}".format(destination_folder, music.file_name) + if music.exists(): + # print("[Info] Skipping the music {}".format(music.targeting_file)) + continue + new_music_count +=1 + music.url = getMp3Url(mp3_url_template.format(music.id)) + try: + music.download() + except: + print("error") + print("[Info] Download music \"{}\" with the status: {}".format(music.file_name, music.status)) +print("[Summary] Found {} new musics".format(new_music_count)) + +if new_music_count > 0: + print("Processing the music") + MusicPatcher().process_music() +else: + print("No need processing") \ No newline at end of file diff --git a/src/patch.py b/src/patch.py new file mode 100644 index 0000000..22361ed --- /dev/null +++ b/src/patch.py @@ -0,0 +1,60 @@ +import psycopg2 + +class MusicPatcher: + connection = False + def __init__(self): + self.connection = psycopg2.connect(user="postgres", + port="5432", + database="mediaserver") + self.connection.set_client_encoding('UTF8') + + def __update(self, sql, params=()): + cur = self.connection.cursor() + cur.execute(sql, params) + updated_rows = cur.rowcount + print("{} row changed for {}".format(updated_rows , sql)) + self.connection.commit() + cur.close() + + def __select(self, sql, params=()): + cur = self.connection.cursor() + cur.execute(sql, params) + rows = cur.fetchall() + cur.close() + return rows + def __update_music_metadata(self, id, title, artist, album,album_artist, orginal_info): + sql = """ UPDATE MUSIC + SET title = %s, artist= %s, album= %s, album_artist= %s, comment= %s + WHERE id = %s""" + # execute the UPDATE statement + self.__update(self,sql,(title, artist, album, album_artist, orginal_info, id)) + def close(self): + if self.connection: + self.connection.close() + + def process_music(self): + postgreSQL_select_Query = "SELECT ID,path FROM track where comment=''" + track_records = self.__select(postgreSQL_select_Query) + for track_record in track_records: + path_tokens = track_record[1].split("/") + tokens =path_tokens[len(path_tokens)-1].replace(".mp3","").split("_") + if len(tokens) != 4: + continue + artist_tracks = self.__select("SELECT track from artist_track where track = {}".format(track_record[0])) + if(len(artist_tracks) ==0): + sql_query = "INSERT INTO artist_track (track, artist, artist_sort, artist_search, has_album_artist) values ({},'{}','{}','{}','f')".format(track_record[0],tokens[1],tokens[1],tokens[1]) + self.__update(sql_query) + else: + sql_query = "UPDATE artist_track set artist='{}', artist_sort='{}', artist_search='{}' where track = {}".format(tokens[1],tokens[1],tokens[1],track_record[0]) + self.__update(sql_query) + + album_tracks = self.__select("SELECT * from album_track where track = {}".format(track_record[0])) + if(len(album_tracks) ==0): + sql_query = "INSERT INTO album_track (track, album, album_sort, album_search, album_artist, album_artist_sort, album_artist_search,from_album_artist) values ({},'{}','{}','{}','{}','{}','{}','t')".format(track_record[0],tokens[2],tokens[2],tokens[2],tokens[1],tokens[1],tokens[1]) + self.__update(sql_query) + else: + sql_query = "UPDATE album_track set album='{}', album_sort='{}', album_search='{}', album_artist='{}',album_artist_sort='{}', album_artist_search='{}' where track = {}".format(tokens[2],tokens[2],tokens[2],tokens[1],tokens[1],tokens[1],track_record[0]) + self.__update(sql_query) + + sql_query = "UPDATE track set title='{}' , title_search='{}', comment={} where id={}".format(tokens[0],tokens[0],tokens[3],track_record[0]) + self.__update(sql_query) \ No newline at end of file diff --git a/src/test.py b/src/test.py new file mode 100644 index 0000000..facb1aa --- /dev/null +++ b/src/test.py @@ -0,0 +1,11 @@ +import string + +str = "xe7xbdxaaxe6x81xb6xe7x8ex8bxe5x86xa0/xe7x94xb2xe9x93x81xe5x9fx8exefxbcx88xe5xbex90xe6xa2xa6xe5x9cx86Remixxefxbcx89" +res = "" +for i, ch in enumerate(str): + if ch =='x' and len(str) > i+2 and all(c in string.hexdigits for c in str[i+1: i+3]): + res += "\\x" + else: + res += ch + +print(res) \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2bf92c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +bin +include +lib +lib64 +pyvenv.cfg +share +**/@eaDir +**/__pycache__ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a16f369 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +from python:slim + +ARG MUSIC_FOLDER_ARG=/nas/music +ARG MUSIC_URL_ARG=https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C + +ENV MUSIC_URL=${MUSIC_URL_ARG} +ENV MUSIC_FOLDER=${MUSIC_FOLDER_ARG} + +COPY requirements.txt / +RUN pip install -r requirements.txt +COPY src/* / +COPY start.sh / +RUN chmod +x /start.sh +ENTRYPOINT ["/start.sh"] + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..36ff8cf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +beautifulsoup4==4.9.3 +bs4==0.0.1 +certifi==2021.5.30 +chardet==4.0.0 +idna==2.10 +psycopg2-binary==2.9.1 +requests==2.25.1 +soupsieve==2.2.1 +urllib3==1.26.5 diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..c4dfd14 --- /dev/null +++ b/src/main.py @@ -0,0 +1,103 @@ +import requests +from bs4 import BeautifulSoup +import re +from urllib.parse import unquote +import os +import time +import glob +from dataclasses import dataclass +import json +from io import StringIO +from contextlib import redirect_stdout +from patch import MusicPatcher +import string + +mp3_url_template = "https://antiserver.kuwo.cn/anti.s?type=convert_url&rid={}&format=mp3&response=url" +music_name_template = "{name}_{artist}_{album}_{id}.mp3" + +destination_folder = os.getenv('MUSIC_FOLDER', "/nas/music") +root_url = os.getenv("MUSIC_URL", "https://www.kuwo.cn/bang/content?name=%E6%8A%96%E9%9F%B3%E7%83%AD%E6%AD%8C%E6%A6%9C") +@dataclass +class Music: + def __init__(self, id: str, artist: str, album: str, name: str, pay:str): + self.artist = artist + self.album = album + self.id = id + self.name = name + self.pay = pay + self.url = "" + self.targeting_file = "" + self.status ="pending" + self.file_name = "" + + def exists(self): + return os.path.exists(self.targeting_file) + + def download(self): + print("Checking url " + self.url) + r = requests.get(self.url, verify=False, timeout=5) + if r.status_code == 200: + with open(self.targeting_file, 'wb') as f: + f.write(r.content) + self.status = "success" + else: + self.status = "fail" + r.close() + return self.status + +def convertToUtf8(str): + f = StringIO() + res = "" + for i, ch in enumerate(str): + if ch =='x' and len(str) > i+2 and all(c in string.hexdigits for c in str[i+1: i+3]): + res += "\\x" + else: + res += ch + with redirect_stdout(f): + exec('print(b\''+ res + '\'.decode(\'utf-8\'))') + # print(f.getvalue()) + res = f.getvalue().replace("\n","") + return res + +def getMp3Url(url): + res = requests.get(url) + url = res.content.decode('utf-8') + res.close() + return url + + + +res = requests.get(root_url) +body = BeautifulSoup(str(res.content), "html.parser") +res.close() +new_music_count = 0 +for item in body.find_all("div", {"class", "tools"}): + json_payload=item["data-music"].replace("'","").replace("\\","") + if not json_payload.endswith("}"): + json_payload += '", "pay":""}' + music_data = json.loads(json_payload) + music = Music(music_data["id"], convertToUtf8(music_data["artist"]) if "artist" in music_data else "", convertToUtf8(music_data["album"]) if "album" in music_data else "", convertToUtf8(music_data["name"]) if "name" in music_data else "", music_data["pay"]) + # print(music.name) + # print(music.artist) + # print(music.album) + # print(music.id) + # print(music.pay) + music.file_name = music_name_template.format(name=music.name, artist=music.artist, album=music.album, id= music.id.replace("MUSIC_", "")) + music.targeting_file = "{}/{}".format(destination_folder, music.file_name) + if music.exists(): + # print("[Info] Skipping the music {}".format(music.targeting_file)) + continue + new_music_count +=1 + music.url = getMp3Url(mp3_url_template.format(music.id)) + try: + music.download() + except: + print("error") + print("[Info] Download music \"{}\" with the status: {}".format(music.file_name, music.status)) +print("[Summary] Found {} new musics".format(new_music_count)) + +if new_music_count > 0: + print("Processing the music") + MusicPatcher().process_music() +else: + print("No need processing") \ No newline at end of file diff --git a/src/patch.py b/src/patch.py new file mode 100644 index 0000000..22361ed --- /dev/null +++ b/src/patch.py @@ -0,0 +1,60 @@ +import psycopg2 + +class MusicPatcher: + connection = False + def __init__(self): + self.connection = psycopg2.connect(user="postgres", + port="5432", + database="mediaserver") + self.connection.set_client_encoding('UTF8') + + def __update(self, sql, params=()): + cur = self.connection.cursor() + cur.execute(sql, params) + updated_rows = cur.rowcount + print("{} row changed for {}".format(updated_rows , sql)) + self.connection.commit() + cur.close() + + def __select(self, sql, params=()): + cur = self.connection.cursor() + cur.execute(sql, params) + rows = cur.fetchall() + cur.close() + return rows + def __update_music_metadata(self, id, title, artist, album,album_artist, orginal_info): + sql = """ UPDATE MUSIC + SET title = %s, artist= %s, album= %s, album_artist= %s, comment= %s + WHERE id = %s""" + # execute the UPDATE statement + self.__update(self,sql,(title, artist, album, album_artist, orginal_info, id)) + def close(self): + if self.connection: + self.connection.close() + + def process_music(self): + postgreSQL_select_Query = "SELECT ID,path FROM track where comment=''" + track_records = self.__select(postgreSQL_select_Query) + for track_record in track_records: + path_tokens = track_record[1].split("/") + tokens =path_tokens[len(path_tokens)-1].replace(".mp3","").split("_") + if len(tokens) != 4: + continue + artist_tracks = self.__select("SELECT track from artist_track where track = {}".format(track_record[0])) + if(len(artist_tracks) ==0): + sql_query = "INSERT INTO artist_track (track, artist, artist_sort, artist_search, has_album_artist) values ({},'{}','{}','{}','f')".format(track_record[0],tokens[1],tokens[1],tokens[1]) + self.__update(sql_query) + else: + sql_query = "UPDATE artist_track set artist='{}', artist_sort='{}', artist_search='{}' where track = {}".format(tokens[1],tokens[1],tokens[1],track_record[0]) + self.__update(sql_query) + + album_tracks = self.__select("SELECT * from album_track where track = {}".format(track_record[0])) + if(len(album_tracks) ==0): + sql_query = "INSERT INTO album_track (track, album, album_sort, album_search, album_artist, album_artist_sort, album_artist_search,from_album_artist) values ({},'{}','{}','{}','{}','{}','{}','t')".format(track_record[0],tokens[2],tokens[2],tokens[2],tokens[1],tokens[1],tokens[1]) + self.__update(sql_query) + else: + sql_query = "UPDATE album_track set album='{}', album_sort='{}', album_search='{}', album_artist='{}',album_artist_sort='{}', album_artist_search='{}' where track = {}".format(tokens[2],tokens[2],tokens[2],tokens[1],tokens[1],tokens[1],track_record[0]) + self.__update(sql_query) + + sql_query = "UPDATE track set title='{}' , title_search='{}', comment={} where id={}".format(tokens[0],tokens[0],tokens[3],track_record[0]) + self.__update(sql_query) \ No newline at end of file diff --git a/src/test.py b/src/test.py new file mode 100644 index 0000000..facb1aa --- /dev/null +++ b/src/test.py @@ -0,0 +1,11 @@ +import string + +str = "xe7xbdxaaxe6x81xb6xe7x8ex8bxe5x86xa0/xe7x94xb2xe9x93x81xe5x9fx8exefxbcx88xe5xbex90xe6xa2xa6xe5x9cx86Remixxefxbcx89" +res = "" +for i, ch in enumerate(str): + if ch =='x' and len(str) > i+2 and all(c in string.hexdigits for c in str[i+1: i+3]): + res += "\\x" + else: + res += ch + +print(res) \ No newline at end of file diff --git a/start.sh b/start.sh new file mode 100644 index 0000000..a7a92cb --- /dev/null +++ b/start.sh @@ -0,0 +1,10 @@ +#!/bin/sh +while true +do + echo "" + echo "" + echo "Fetching Musics happens at: $(date +"%Y/%m/%d %I:%M:%S")" + echo "#########################################" + python3 /main.py + sleep 1d +done \ No newline at end of file