回答-BFW问答

思路一般是：1) 把“曲子文本”解析成音符序列（音高 + 时值）2) 生成 MIDI 文件3) 用合成器（SoundFont）把 MIDI 渲染成 WAV4) 可选：再转成 MP3

下面给你两个常用做法：高质量（用 SoundFont 渲染）和纯 Python（快速 demo，不依赖外部合成器）。

方法 A：pretty_midi + FluidSynth + SoundFont（音色更真实）

安装依赖:

pip: pip install pretty_midi pyfluidsynth soundfile pydub

系统：需要安装 FluidSynth（pyfluidsynth 调用它）

macOS: brew install fluid-synth

Ubuntu/Debian: sudo apt-get install fluidsynth

Windows: choco install fluidsynth 或下载预编译版本

MP3 需要 ffmpeg

macOS: brew install ffmpeg

Ubuntu/Debian: sudo apt-get install ffmpeg

Windows: 安装 ffmpeg 并把 ffmpeg.exe 加到 PATH

准备一个通用 SoundFont（.sf2），例如 “GeneralUser GS” 或 “Chorium”. 将路径填到 sf2_path

文本格式（简单自定义，可直接用）：支持

单音：C4:1 表示 C4 1 拍

升降号：C#4:0.5, Db4:0.5

休止：R:1

和弦：C4+E4+G4:2

默认八度为 4（如 C:1 等同 C4:1），默认时值 1 拍；拍速由 tempo 控制

代码

# -*- coding: utf-8 -*-
# 依赖: pretty_midi, pyfluidsynth, soundfile, pydub, numpy
import re
import numpy as np
import pretty_midi
import soundfile as sf

def parse_text_score(text):
    """
    支持:
      - 单音: C4:1, D#5:0.5, F:2(默认八度=4)
      - 休止: R:1
      - 和弦: C4+E4+G4:2
      - 分隔符: 空格 / 逗号 / 竖线
      - 默认时值: 1 拍
    返回: [{'notes': ['C4','E4'], 'beats': 2.0}, {'notes': [], 'beats': 1.0}, ...]
    """
    tokens = re.split(r'[\s,|]+', text.strip())
    events = []
    for tk in tokens:
        if not tk:
            continue
        if ':' in tk:
            left, dur_str = tk.split(':', 1)
            beats = float(dur_str)
        else:
            left, beats = tk, 1.0

        if left.upper() == 'R':  # 休止
            events.append({'notes': [], 'beats': beats})
            continue

        note_syms = left.split('+')
        notes = []
        for s in note_syms:
            m = re.fullmatch(r'([A-Ga-g])([#b]?)(\d?)', s)
            if not m:
                raise ValueError(f"无法解析音符: {s}")
            letter, acc, octv = m.groups()
            if not octv:
                octv = '4'  # 默认八度
            notes.append(f"{letter.upper()}{acc}{octv}")
        events.append({'notes': notes, 'beats': beats})
    return events

def text_to_pretty_midi(score_text, tempo=120, instrument_name='Acoustic Grand Piano'):
    events = parse_text_score(score_text)
    pm = pretty_midi.PrettyMIDI(initial_tempo=tempo)
    try:
        program = pretty_midi.instrument_name_to_program(instrument_name)
    except:
        program = 0  # 回退到Grand Piano
    inst = pretty_midi.Instrument(program=program)

    t = 0.0
    sec_per_beat = 60.0 / tempo
    for ev in events:
        dur = ev['beats'] * sec_per_beat
        for note_name in ev['notes']:
            pitch = pretty_midi.note_name_to_number(note_name)
            inst.notes.append(pretty_midi.Note(
                velocity=96, pitch=pitch, start=t, end=t + dur
            ))
        t += dur

    pm.instruments.append(inst)
    return pm

if __name__ == "__main__":
    # 1) 你的文本谱例（自行替换）
    score = "C4:1 D4:1 E4:2 | R:1 | C4+E4+G4:2 G4:1 F4:1"

    # 2) 生成 MIDI
    tempo = 100
    pm = text_to_pretty_midi(score, tempo=tempo, instrument_name='Acoustic Grand Piano')
    pm.write("song.mid")
    print("已写出: song.mid")

    # 3) 渲染为 WAV（需要: pyfluidsynth + 安装好的 FluidSynth + .sf2 SoundFont）
    sf2_path = "/path/to/your/GeneralUser_GS.sf2"  # <-- 改成你的 .sf2 路径
    audio = pm.fluidsynth(sf2_path=sf2_path, fs=44100)  # 需要 pyfluidsynth
    sf.write("song.wav", audio, 44100)
    print("已写出: song.wav")

    # 4) 可选：转 MP3（需要 ffmpeg）
    try:
        from pydub import AudioSegment
        AudioSegment.from_wav("song.wav").export("song.mp3", format="mp3", bitrate="192k")
        print("已写出: song.mp3")
    except Exception as e:
        print("MP3 导出跳过（需要安装 pydub + ffmpeg）:", e)

方法 B：不装合成器，直接用正弦波合成 WAV（快速 demo）

# -*- coding: utf-8 -*-
import re
import numpy as np
import soundfile as sf

def parse_text_score_simple(text):
    tokens = re.split(r'[\s,|]+', text.strip())
    events = []
    for tk in tokens:
        if not tk:
            continue
        if ':' in tk:
            left, dur_str = tk.split(':', 1)
            beats = float(dur_str)
        else:
            left, beats = tk, 1.0

        if left.upper() == 'R':
            events.append({'notes': [], 'beats': beats})
            continue

        note_syms = left.split('+')
        notes = []
        for s in note_syms:
            m = re.fullmatch(r'([A-Ga-g])([#b]?)(\d?)', s)
            if not m:
                raise ValueError(f"无法解析音符: {s}")
            letter, acc, octv = m.groups()
            if not octv:
                octv = '4'
            notes.append(f"{letter.upper()}{acc}{octv}")
        events.append({'notes': notes, 'beats': beats})
    return events

SEMITONES = {
    'C':0,'C#':1,'Db':1,'D':2,'D#':3,'Eb':3,'E':4,'F':5,'F#':6,'Gb':6,
    'G':7,'G#':8,'Ab':8,'A':9,'A#':10,'Bb':10,'B':11
}
def note_to_midi(n):
    m = re.match(r'^([A-G])([#b]?)(\d+)$', n)
    letter, acc, octv = m.groups()
    semitone = SEMITONES[letter + (acc or '')]
    return 12 * (int(octv) + 1) + semitone  # C4=60

def synth_sine(score_text, tempo=120, fs=44100, amp=0.2, attack=0.01, release=0.05):
    evs = parse_text_score_simple(score_text)
    sec_per_beat = 60.0 / tempo
    audio = []
    for ev in evs:
        dur = ev['beats'] * sec_per_beat
        n_samp = max(1, int(round(dur * fs)))
        if not ev['notes']:  # 休止
            audio.append(np.zeros(n_samp, dtype=np.float32))
            continue
        # 简单混合（同一时刻多个音 = 和弦）
        mix = np.zeros(n_samp, dtype=np.float32)
        t = np.arange(n_samp) / fs
        for note in ev['notes']:
            midi = note_to_midi(note)
            freq = 440.0 * (2 ** ((midi - 69) / 12.0))
            wave = np.sin(2*np.pi*freq*t)
            mix += wave
        mix /= max(1, len(ev['notes']))  # 防止过载
        # ADSR (只做 Attack/Release)
        a = int(fs * attack)
        r = int(fs * release)
        env = np.ones(n_samp, dtype=np.float32)
        if a > 0:
            env[:a] = np.linspace(0, 1, a, endpoint=False)
        if r > 0:
            env[-r:] = np.linspace(1, 0, r, endpoint=False)
        audio.append((amp * mix * env).astype(np.float32))
    return np.concatenate(audio)

if __name__ == "__main__":
    score = "C4:1 D4:1 E4:2 | R:1 | C4+E4+G4:2"
    y = synth_sine(score, tempo=100, fs=44100)
    sf.write("demo_sine.wav", y, 44100)
    print("已写出: demo_sine.wav")</pre>

优点：不需要安装 FluidSynth

缺点：只有简单波形，音色单一- 仍然可同时导出 MIDI（用上面生成的 pm.write）

如果你的“曲子文本”是现成记谱格式

ABC notation / MusicXML / TinyNotation：用 music21 直接解析并导出 MIDI，随后再转 WAV/MP3

示例（TinyNotation）：

# pip install music21
from music21 import converter, instrument
s = converter.parse('tinynotation: 4/4 c4 d e f g1')
s.insert(0, instrument.Piano())
s.write('midi', fp='tiny.mid')
# 然后用上面的渲染步骤把 tiny.mid 转成 WAV/MP3

方法C 基于AI的描述性文本生成

这种方法利用了先进的AI模型，如Google的MusicLM或Meta的MusicGen。它们可以直接从文本描述生成音频。使用这种方法通常依赖于预训练好的模型，最方便的途径是通过 Hugging Face 的 transformers 库。

需要安装的库

pip install transformers torch# 可能还需要安装 accelerate 以加速

pip install accelerate

示例代码：使用 MusicGen 模型

MusicGen 是一个强大的文本到音乐生成模型。

import scipy
from transformers import pipeline

def generate_music_from_prompt(prompt, output_filename="ai_music.wav", duration=10):
    """
    使用 Hugging Face 的 MusicGen 模型从文本描述生成音乐
    """
    # 加载模型，模型会自动下载，第一次会比较慢
    # 'facebook/musicgen-small' 是一个小模型，速度快，效果尚可
    # 'facebook/musicgen-medium' 或 'facebook/musicgen-large' 效果更好，但需要更多资源
    synthesiser = pipeline("text-to-audio", model="facebook/musicgen-small")
    
    print("AI正在生成音乐，请稍候...")
    # max_new_tokens可以控制生成音乐的长度，可以自己调整
    music = synthesiser(prompt, forward_params={"max_new_tokens": int(duration * 256 / 5)})
    
    # 获取采样率和音频数据
    sampling_rate = music["sampling_rate"]
    audio_data = music["audio"]
    
    # 保存为WAV文件
    scipy.io.wavfile.write(output_filename, rate=sampling_rate, data=audio_data[0])
    print(f"AI生成的音乐已保存为: {output_filename}")

# --- 主程序 ---
# 你的音乐描述
music_prompt = "A sad lofi hip hop beat for studying, with soft piano and rain sounds"
# music_prompt = "80s electronic synthwave, driving in a futuristic city at night"
# music_prompt = "一段舒缓的中国古典古筝独奏"

# 生成音乐
generate_music_from_prompt(music_prompt, duration=15) # 生成15秒的音乐

# 如果需要转成mp3，可以复用上面的 pydub 方法
# from pydub import AudioSegment
# sound = AudioSegment.from_wav("ai_music.wav")
# sound.export("ai_music.mp3", format="mp3")

注意：AI模型很大，第一次运行 pipeline 时会下载模型文件（几百MB到几GB不等），需要耐心等待。AI生成音乐需要较多的计算资源（CPU也可以，但有GPU会快很多）。生成的结果是随机的，每次运行同一个prompt可能会得到不同的音乐。这种方法直接生成WAV音频，跳过了MIDI步骤。

回答

开发了一个网站ai聊天助手

一个月开发一套类似coze的智能体平台

部署一套内网离线ai助理

私有ai助理开发

类似如家的租房app开发

h5手机端考试网站开发

开发一个短剧解锁剧集的小程序

我要开发一个酒类拍卖交易平台

开发艺术品拍卖收藏买画卖画h5网站

帮我做个数字货币交易所网站

有没有免费让ai自动帮你接管操作电脑的mcp服务？

mcp为啥用Streamable HTTP 替代 HTTP + SSE？

scratchjr有没有开源的前端html网页版本源代码？

多模态大模型能否根据ui交互视频来来模仿写出前端交互动画效果ui代码？

如何用阿里云oss+函数计算fc+事件总线EventBridge+消息队列+数据库+redis缓存打造一个高并发弹性系统？

阿里云函数计算 FC如何在海外节点搭建一个代理网络？

ai studio中gemini build的代码如何发布到github pages等免费网页托管上？

如何在cursor、qoder、trae中使用Claude Skills功能？

有没有不用u盘就能重装系统的开源工具？

python如何固定摄像头实时计算停车场停车位剩余数量？