python实现接入百度语音识别,文本转语音sdk,selenium,AI伙伴爬虫

文本效果:

画图效果

百度语音api参数官网申请应用 https://console.bce.baidu.com/

应用申请,将参数放入代码中

领取使用次数

selenium驱动下载 https://registry.npmmirror.com/binary.html?path=chromedriver/

要下载和电脑上chrome浏览器版本类似的版本

下载以后修改代码中”D:\chromedriver4.exe” 驱动路径

pip相关库

pygame:
- 用途：pygame是一个用于开发2D游戏的Python库，但也可用于音频播放和处理。
- 安装命令：pip install pygame
speech_recognition:
- 用途：speech_recognition库用于语音识别，可以将音频转换为文本。
- 安装命令：pip install SpeechRecognition
aip（百度AI开放平台的Python SDK）:
- 用途：aip库是百度AI开放平台的Python SDK，用于访问各种百度AI服务，例如语音合成、语音识别等。
- 安装命令：pip install baidu-aip
selenium:
- 用途：selenium是一个自动化测试工具，也可用于Web数据抓取和网站自动化操作，如模拟用户在网页上的行为。
- 安装命令：pip install selenium
pip install urllib3==1.24.3 chardet==3.0.4 charset_normalizer==2.0.12

代码

import time
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import all_func
import requests
from PIL import Image, ImageTk
import tkinter as tk
from tkinter import BOTH, YES
from tkinter import scrolledtext

def webfull():
    try:
        option = webdriver.ChromeOptions()
        option.add_argument(
            'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36')

        chrome_driver = r"D:\chromedriver4.exe"
        driver = webdriver.Chrome(executable_path=chrome_driver, chrome_options=option)
        driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
            Object.defineProperty(navigator, 'webdriver', {
              get: () => false
            })
          """
        })
        driver.get("https://chat.baidu.com/?sourceType=pc_r_backup")
        time.sleep(60)
        cookies = driver.get_cookies()  # 获取cookies
        f1 = open('cookie.txt', 'w')  # cookies存入文件JSON字符串
        f1.write(json.dumps(cookies))
        f1.close()
        driver.close()

    except Exception as e:
        print(e)


def webfull2():
    try:
        option = webdriver.ChromeOptions()
        option.add_argument('--headless')
        option.add_argument(
            'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36')

        chrome_driver = r"D:\chromedriver4.exe"
        driver = webdriver.Chrome(executable_path=chrome_driver, chrome_options=option)
        driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
            Object.defineProperty(navigator, 'webdriver', {
              get: () => false
            })
          """
        })
        driver.get("https://chat.baidu.com/?sourceType=pc_r_backup")
        # driver.get("https://yiyan.baidu.com/")
        # 从保存文件中提取cookies
        with open('cookie.txt', 'rb') as f1:
            cookie = f1.read()
        cookie_list = json.loads(cookie)  # json读取cookies
        for c in cookie_list:
            driver.add_cookie(c)  # 取出的cookie循环加入driver

        driver.refresh()
        # time.sleep(1000)
        # 使用class属性查找文本输入框
        text_input = driver.find_element_by_class_name('text-input-textarea_5vlNp')
        print("画图则输入`帮我画xxx`")
        # 输入内容
        num = 0
        while True:
            while True:
                # print("请您说话:")
                # all_func.rec()
                # input_text = all_func.listen()
                input_text = input("输入内容:")
                if input_text.lower() != '':
                    y_or_no = input("是否确认(y/n):")
                    if y_or_no.lower() != 'y':
                        continue
                    break
            text_input.send_keys(input_text)
            text_input.send_keys(Keys.RETURN)
            print("回复中----------------------------------------------------")
            time.sleep(5)
            # 设置等待时间（秒）
            wait = WebDriverWait(driver, 60)  # 设置为60秒，可以根据需要调整等待时间
            while True:
                try:
                    # 使用WebDriverWait来等待元素出现
                    wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'retry-generate_2vfiC')))
                    # 退出循环或执行其他操作
                    break
                except:
                    # 如果元素未出现，等待1秒后继续检查
                    time.sleep(1)
            substring = "帮我画"
            if substring not in input_text:
                # 使用class属性查找元素
                elements = driver.find_elements_by_class_name('place-holder_3Lnph')
                element = elements[num]
                # 使用XPath找到所有的子元素
                child_elements = element.find_elements_by_xpath('./*')
                # 打印每个子元素
                all_txt = ""
                for child in child_elements:
                    res_txt = child.text.replace("1。", "")
                    print(res_txt)
                    all_txt += res_txt+"\\n"
                    all_func.speak(res_txt)
                    all_func.play()
                show_text_popup(all_txt)
            else:
                try:
                    elements = driver.find_elements_by_class_name('markdown-container_3rJ6c')
                    element = elements[num]
                    # 使用标签名查找img元素
                    img_element = element.find_element_by_tag_name('img')
                    # 打印img元素的src属性
                    res_imgurl = img_element.get_attribute('src')
                    # 存储
                    response = requests.get(res_imgurl)
                    with open("local_image.jpg", 'wb') as file:
                        file.write(response.content)
                    print("已画好并存入本地!!!")
                    display_image()
                except:
                    print("绘画失败!!!")
            # 清空输入框
            num += 1
            text_input.send_keys('')
            print("---------------------------------------------------------")
            # 打印元素文本内容
    except Exception as e:
        print(e)

def close_window():
    root.destroy()
# 显示gui弹窗
def display_image():
    global root
    root = tk.Tk()
    root.title("显示本地图片")
    # 设置弹窗始终在前面显示
    root.attributes('-topmost', True)

    file_path = "local_image.jpg"  # 请替换为您的本地图片路径
    image = Image.open(file_path)
    photo = ImageTk.PhotoImage(image)
    label = tk.Label(root, image=photo)
    label.image = photo
    label.pack()
    # root.after(5000, close_window)  # 延迟3秒（3000毫秒）后关闭窗口
    root.mainloop()


# 显示文本
def show_text_popup(text):
    root = tk.Tk()
    root.title("文本弹窗")
    root.attributes('-topmost', True)

    # 设置窗口大小为600x400
    root.geometry("600x400")

    # 创建带有滚动条的文本框
    text_widget = scrolledtext.ScrolledText(
        root, wrap=tk.WORD, width=40, height=10, font=("Helvetica", 12))  # 修改字体和大小
    text_widget.pack(fill=BOTH, expand=YES)

    # 将文本中的"\n"替换为换行
    text = text.replace("\\n", "\n")

    # 插入文本并禁用编辑
    text_widget.insert(tk.END, text)
    text_widget.config(state=tk.DISABLED)

    root.mainloop()



# 主函数
webfull2()
# 获取cookie
# webfull()

代码 all_func

import time
import pygame
import speech_recognition as sr
from aip import AipSpeech


# 百度语音api参数,官网申请应用 https://console.bce.baidu.com/
APP_ID = ''
API_KEY = ''
SECRET_KEY = ''

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)


# 语音写入文件
def rec(rate=18000):
    r = sr.Recognizer()
    with sr.Microphone(sample_rate=rate) as source:
        audio = r.listen(source)
    with open("recording.wav", "wb") as f:
        f.write(audio.get_wav_data())


# 读取文件转文字
def listen():
    with open('recording.wav', 'rb') as f:
        audio_data = f.read()

    result = client.asr(audio_data, 'wav', 16000, {
        'dev_pid': 1537,
    })
    if 'result' in result:
        result_text = result['result'][0]
        print("你说: " + result_text)
    else:
        print("语音识别失败：" + str(result))

    return result_text


# 文字转语音
# 度小美=0度小宇=1度逍遥=3度丫丫=4度逍遥=5003度小鹿=5118
# 度小童=110，度小萌=111，度米朵=103，度小娇=5 度博文=106
def speak(text=""):
    result = client.synthesis(text, 'zh', 1, {
        'spd': 4,
        'vol': 5,
        'per': 5118,
    })
    if not isinstance(result, dict):
        with open('audio.mp3', 'wb') as f:
            f.write(result)


# 播放语音
def play():
    pygame.mixer.init()
    pygame.mixer.music.load("audio.mp3")
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        time.sleep(0.1)
    pygame.mixer.quit()

# while True:
#     rec()
#     request = listen()
#     speak(request)
#     play()

先运行webfull()方法,登录账号密码,等待自动关闭,将cookie写入本地

再执行webfull2()读取本地cookie运行代码

注释以下代码可看到弹出浏览器

option.add_argument('--headless')

一	二	三	四	五	六	日
« 2月
				1	2	3
4	5	6	7	8	9	10
11	12	13	14	15	16	17
18	19	20	21	22	23	24
25	26	27	28	29	30

Technical blog꧂

python实现接入百度语音识别,文本转语音sdk,selenium,AI伙伴爬虫

分类目录

近期文章

站长QQ:2051510479