def init_new_metahuman(self, model_folder: str, scale: int, is_paly_media_audio: bool, enable_random_param: bool, push_stream_url: bool) -》 bool: """ 初始化数字人 Initialize digital person model_folder: 模型文件夹 scale: 缩放,1原始大小,0.5缩小一半 is_paly_media_audio: 是否播放素材中的音频 enable_random_param: 是否开启随机参数,随机内容:音频随机(不含数字人说话声音随机) 和 画面随机(包含平移、旋转、缩放、动作泛化) push_stream_url: 推流地址,默认为 "" 不推流,仅支持rtmp,例如: rtmp://your_server_ip/live/stream return: 成功返回True,失败返回错误信息 model_folder: model folder scale: scaling, 1 original size, 0.5 reduced by half is_paly_media_audio: whether to play the audio in the material Enable_random_param: whether to turn on the random parameter, including audio random (excluding digital human voice random) and picture random (including translation, rotation, scaling and motion generalization). push_stream_url: push stream address; the default value is "",and only rtmp is supported; for example, RTMP://your _ server _ IP/live/stream return: Returns True on success, and returns an error message on failure """ # 使用示例 [Demo] result = self.init_new_metahuman(r"D:\Project\Aibote\NewHuman\model", 0.5, False, False, "") print(result) # Python拉流示例 [Demo] import cv2 # 推流地址 rtmp_url = "rtmp://127.0.0.1/live/stream" # 创建Video对象 cap = cv2.VideoCapture(rtmp_url) # 检查是否成功打开流 if not cap.isOpened(): exit() # 读取并显示视频帧 while True: ret, frame = cap.read() if not ret: break cv2.imshow('RTMP Stream', frame) if cv2.waitKey(1) or 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def new_metahuman_switch_action(self, figure_video_path: str, scale: int, is_paly_media_audio: bool, is_swap_color: bool) -》 bool: """ 切换人物形象动作 (使用前需要先调用 init_new_metahuman 初始化数字人) Switch character image action figure_video_path: 人物视频路径 scale: 缩放,1原始大小,0.5缩小一半 is_paly_media_audio: 是否播放素材中的音频 is_swap_color: 是否更换基础嘴型颜色 return: 成功返回True,失败返回错误信息 figure_video_path: model folder scale: scaling, 1 original size, 0.5 reduced by half is_paly_media_audio: whether to play the audio in the material is_swap_color: Do you want to change the basic mouth color return: Returns True on success, and returns an error message on failure """ # 使用示例 [Demo] result = self.new_metahuman_switch_action(r"D:\Project\666.mp4", 0.5, False, False) print(result)
def new_metahuman_add_background(self, bg_path: str) -》 bool: """ 添加视频/图片背景 (使用前需要先调用 init_new_metahuman 初始化数字人) Add video/picture background bg_path: 背景视频/图片路径 return: 成功返回True,失败返回错误信息 bg_path: background video/picture path return: Returns True on success, and returns an error message on failure """ # 使用示例 [Demo] result = self.new_metahuman_add_background(r"D:\Project\1.mp4") print(result)
def new_metahuman_del_background(self) -》 bool: """ 删除视频/图片背景 Delete video/picture background return: 成功返回True,失败返回错误信息 return: Returns True on success, and returns an error message on failure """ # 使用示例 [Demo] result = self.new_metahuman_del_background() print(result)
def new_metahuman_generate_human_video(self, audio_path: str) -》 bool: """ 生成数字人视频 Generate digital human video audio_path: 音频路径,需要提前生成lab文件 return: 失败返回错误信息,成功返回true,并在audio_path同级目录下生成同名 .mp4 后缀的数字人视频文件 audio_path: audio path. lab files need to be generated in advance. return: an error message is returned in case of failure, and true is returned in case of success, and a digital human video file with the same name and .mp4 suffix is generated in the same level directory of audio_path """ # 使用示例 [Demo] result = self.new_metahuman_generate_human_video(r"D:\Project\888.wav") print(result)
def new_metahuman_audio_to_lab(self, server_ip: str, audio_path: str) -》 bool: """ 生成lab文件 Generate lab file server_ip: lab服务端IP audio_path: 音频文件路径 return: 失败返回错误信息,成功返回true,并在audioPath同级目录下生成同名 .lab 后缀的lab文件 server_ip: lab server IP audio_path: audio file path return: failure returns an error message, success returns true, and a lab file with the same name. lab suffix is generated in the same directory of audioPath """ # 使用示例 [Demo] result = self.new_metahuman_audio_to_lab("127.0.0.1", r"D:\Project\888.wav") print(result)
def new_metahuman_text_to_audio(self, server_ip: str, save_audio_path: str, refer_audio_path: str, refer_text: str, text : str, speed_factor : int) -》 bool: """ 文本转音频 Text to audio server_ip: 声音克隆服务端IP save_audio_path: 保存的音频文件路径 refer_audio_path: 参考音频路径 refer_text: 参考文本内容(内容必须与参考音频播报的内容一致) text: 要合成的文本内容 speed_factor: 语速,默认为1正常语速 return: 失败返回错误信息,成功返回true server_ip: voice cloning server IP save_audio_path: the path of the saved audio file reference _ audio _ path: the refer_audio_path refer_text: Reference text content (the content must be consistent with the content broadcast by reference audio) text: the text content to be synthesized speed_factor: speech speed, which defaults to 1 normal speech speed return: failure returns an error message, and success returns true """ # 使用示例 [Demo] result = self.new_metahuman_text_to_audio("127.0.0.1", r"D:\Project\888.wav",r"D:\Project\666.wav", "我是参考音频中的文本内容", "我是一个实际生成的文本内容", 1) print(result)
def new_metahuman_audio_to_text(self, server_ip: str, audio_path: str) -》 bool: """ 语音转文本 Speech to text server_ip: 语音识别服务端IP audio_path: 音频文件路径 return: 失败返回"None",成功返回语音识别的文本内容 server_ip: IP of speech recognition server audio_path: audio file path return: "None" is returned in case of failure, and the text content of speech recognition is returned successfully """ # 使用示例 [Demo] result = self.new_metahuman_audio_to_text("127.0.0.1", r"D:\Project\888.wav") print(result)
def new_metahuman_human_speak(self, audio_path: str, wait_play_sound: bool) -》 bool: """ 数字人说话 Digital people speak audio_path: 音频路径,需要提前生成lab文件 wait_play_sound: 是否等待播报完毕 return: 成功返回true,失败返回错误信息 audio_path: audio path. lab files need to be generated in advance wait_play_sound: Do you want to wait for the broadcast to finish return: Returns true on success, and returns an error message on failure """ # 使用示例 [Demo] result = self.new_metahuman_human_speak(r"D:\Project\888.wav", True) print(result)
def new_metahuman_stop_speak(self) -》 bool: """ 打断说话 Interrupt speech return: 返回true return: Returns true """ # 使用示例 [Demo] result = self.new_metahuman_stop_speak() print(result)
def new_metahuman_start_record(self, save_audio_path) -》 bool: """ 录制麦克风 Recording microphone save_audio_path: 录制保存的音频路径,需要调用 stopRecord 结束录制并保存 return: 返回true save_audio_path: record the saved audio path. You need to call stopRecord to end the recording and save it return: returns true """ # 使用示例 [Demo] result = self.new_metahuman_start_record(r"D:\Project\888.wav") print(result)
def new_metahuman_stop_record(self) -》 bool: """ 停止录制 Stop recording return: 返回true return: returns true """ # 使用示例 [Demo] result = self.new_metahuman_stop_record() print(result)
def new_get_face_data(self, server_ip: str, call_api_key: str, video_path: str) -》 bool: """ 获取脸部数据 Get face data server_ip: 服务端IP call_api_key: 调用密钥, 密钥和IP获取联系QQ:2766463939 video_path: 人脸视频路径 return: 失败返回错误信息,成功返回true。并在videoPath同级目录下生成.pt 后缀的人脸数据文件 server_ip: server IP call_api_key: call key video_path: face video path return: failure returns an error message, and success returns true. And generate a face data file with. pt suffix in the same level directory of videoPath """ # 使用示例 [Demo] result = self.new_get_face_data("82.68.89.156", "yNjEtNDM5Mi04OGZmLTU2NzBmYjgwYTAxMAACKlszLCIwZDYyNTc0MC0yN", r"D:\Project\888.mp4") print(result)
def new_metahuman_generate_human_video_ex(self, server_ip: str, call_api_key: str, audio_path: str, video_path: str, save_video_path: str) -》 bool: """ 云端算力生成短视频 Cloud computing generates short videos server_ip: 服务端IP call_api_key: 调用密钥, 密钥和IP获取联系QQ:2766463939 audio_path: 驱动口型的音频路径 video_path: 原视频路径 save_video_path: 保存的合成结果路径 return: 失败返回错误信息,成功返回true server_ip: server IP call_api_key: call key audio_path: the audio path of the driving mouth video_path: original video path save_video_path: the saved composition result path return: failure returns an error message, and success returns true """ # 使用示例 [Demo] result = self.new_metahuman_generate_human_video_ex("82.68.89.156", "yNjEtNDM5Mi04OGZmLTU2NzBmYjgwYTAxMAACKlszLCIwZDYyNTc0MC0yN", r"D:\Project\888.wav", r"D:\Project\888.mp4", r"D:\Project") print(result)
def new_metahuman_train_voice_ex(self, appid: str, token: str, spk_id: str, refer_audio_path: str) -》 bool: """ 云端算力训练声音 Cloud computing power training voice 云端算力密钥获取地址: https://console.volcengine.com/speech/service/9999?AppID=1330699505 Cloud computing key acquisition address: https://console.volcengine.com/speech/service/9999?AppID=1330699505 appid: APP ID token: Access Token spk_id: 声音ID refer_audio_path: 参考音频 return: 训练成功返回true,失败返回错误信息 appid: APP ID token: Access Token spk_ID: sound id reference_audio_path: reference audio return: training success returns true, failure returns an error message """ # 使用示例 [Demo] result = self.new_metahuman_train_voice_ex("1330149505", "jorvx09WGQDYCJFIOalT42TeYlkSEY7A", "S_HRboGJG2r1", r"D:\Project\888.mp4") print(result)
def new_metahuman_get_train_status_ex(self, appid: str, token: str, spk_id: str) -》 str: """ 获取 trainVoiceEx 训练状态 Get the trainVoiceEx training status 云端算力密钥获取地址: https://console.volcengine.com/speech/service/9999?AppID=1330699505 Cloud computing key acquisition address: https://console.volcengine.com/speech/service/9999?AppID=1330699505 appid: APP ID token: Access Token spk_id: 声音ID return: 返回训练状态"Train Success"、"NotFound"、"Training"、"Failed"、unknow" appid: APP ID token: Access Token spk_ID: sound id return: returns to the Training status of "Train Success", "NotFound", "training", "Failed" and "unknow" """ # 使用示例 [Demo] result = self.new_metahuman_get_train_status_ex("1330149505", "jorvx09WGQDYCJFIOalT42TeYlkSEY7A", "S_HRboGJG2r1") print(result)
def new_metahuman_text_to_audio_ex(self, appid: str, token: str, spk_id: str, cluster: str, text: str, speed_ratio: str, save_audio_path: str) -》 bool: """ 云端算力文本转语音 Cloud computing power text-to-speech 云端算力密钥获取地址: https://console.volcengine.com/speech/service/9999?AppID=1330699505 Cloud computing key acquisition address: https://console.volcengine.com/speech/service/9999?AppID=1330699505 appid: APP ID token: Access Token spk_id: 声音ID cluster: Cluster ID 声音复刻大模型:"volcano_icl" 语音合成大模型:"volcano_tts" text: 合成的文本 speed_ratio: 语速,正常为1 save_audio_path: 保存的音频路径, 同时会在音频同目录下生成lab文件 return: 成功返回true,失败返回错误信息 appid: APP ID token: Access Token spk_ID: sound id cluster: Cluster ID sound reproduction model: "volcano_icl" speech synthesis model: "volcano_tts" text: synthesized text speed_ratio: speech speed, which is normally 1 save_audio_path: the saved audio path, and a lab file will be generated in the same audio directory return: Returns true on success, and returns an error message on failure """ # 使用示例 [Demo] result = self.new_metahuman_text_to_audio_ex("1330149505", "jorvx09WGQDYCJFIOalT42TeYlkSEY7A", "S_HRboGJG2r1", "volcano_icl", "Aibote数字人CPU实时推理嘴型全球第一", 1, r"D:\Project\888.wav") print(result)
def new_metahuman_get_extend_param(self) -》 str: """ 获取驱动程序命令行参数(不包含ip和port) Get the driver command line parameters (excluding ip and port) return: 成功返回参数,失败返回None return: parameter is returned successfully, and None is returned on failure. """ # 使用示例 [Demo] result = self.new_metahuman_get_extend_param() print(result)
def new_metahuman_get_driver_folder(self) -》 str: """ 获取驱动程序 所在的文件夹路径 Gets the folder path where the driver is located return: 返回AiDriver.exe所在的文件夹路径 return: Returns the folder path where AiDriver.exe is located """ # 使用示例 [Demo] result = self.new_metahuman_get_driver_folder() print(result)
def new_metahuman_close_driver(self) -》 str: """ 关闭驱动 Turn off drive return: 返回true return: Return true """ # 使用示例 [Demo] result = self.new_metahuman_close_driver() print(result)
def new_metahuman_insert_video(self, video_path: str) -》 bool: """ 插入特写视频 Insert close-up video video_path: 要播放的视频 return: 成功返回true,失败返回错误信息 video_path: the video to play return: Returns true on success, and returns an error message on failure """ # 使用示例 [Demo] result = self.new_metahuman_insert_video(r"D:\Project\888.mp4") print(result)