diff --git a/webhook.js b/webhook.js
index d382bae..2b63e7c 100755
--- a/webhook.js
+++ b/webhook.js
@@ -364,6 +364,37 @@
                   <CreateTime>${timestamp}</CreateTime>
                   <MsgType><![CDATA[image]]></MsgType>
                   <Image><MediaId><![CDATA[${contentOrMediaId}]]></MediaId></Image>`;
+    } else if (msgType === "voice") {
+      innerXml = `<ToUserName><![CDATA[${toUser}]]></ToUserName>
+                  <FromUserName><![CDATA[${fromUser}]]></FromUserName>
+                  <CreateTime>${timestamp}</CreateTime>
+                  <MsgType><![CDATA[voice]]></MsgType>
+                  <Voice><MediaId><![CDATA[${contentOrMediaId}]]></MediaId></Voice>`;
+    } else if (msgType === "video") {
+      innerXml = `<ToUserName><![CDATA[${toUser}]]></ToUserName>
+                  <FromUserName><![CDATA[${fromUser}]]></FromUserName>
+                  <CreateTime>${timestamp}</CreateTime>
+                  <MsgType><![CDATA[video]]></MsgType>
+                  <Video>
+                    <MediaId><![CDATA[${contentOrMediaId}]]></MediaId>
+                    <Title><![CDATA[${options.title || ""}]]></Title>
+                    <Description><![CDATA[${options.description || ""}]]></Description>
+                  </Video>`;
+    } else if (msgType === "news") {
+      const articles = options.articles || [];
+      const articlesXml = articles.map(item => `
+        <item>
+          <Title><![CDATA[${item.title || ""}]]></Title>
+          <Description><![CDATA[${item.description || ""}]]></Description>
+          <PicUrl><![CDATA[${item.picUrl || ""}]]></PicUrl>
+          <Url><![CDATA[${item.url || ""}]]></Url>
+        </item>`).join("");
+      innerXml = `<ToUserName><![CDATA[${toUser}]]></ToUserName>
+                  <FromUserName><![CDATA[${fromUser}]]></FromUserName>
+                  <CreateTime>${timestamp}</CreateTime>
+                  <MsgType><![CDATA[news]]></MsgType>
+                  <ArticleCount>${articles.length}</ArticleCount>
+                  <Articles>${articlesXml}</Articles>`;
     } else {
       logger.warn("Unsupported message type for passive XML reply", { msgType });
       return null;
diff --git a/wecom-api.js b/wecom-api.js
index ffd66d5..c47dd42 100755
--- a/wecom-api.js
+++ b/wecom-api.js
@@ -34,6 +34,33 @@
     return data.access_token;
   }
 
+  async uploadMedia(corpId, corpSecret, type, filePath, fileName) {
+    const { readFile } = await import("fs/promises");
+    const { Buffer } = await import("buffer");
+    
+    logger.debug("WeCom API: Uploading media", { corpId, type, filePath });
+    const token = await this.getAccessToken(corpId, corpSecret);
+    
+    const fileBuffer = await readFile(filePath);
+    const formData = new FormData();
+    const blob = new Blob([fileBuffer]);
+    formData.append('media', blob, fileName || 'file');
+
+    const res = await fetch(`https://qyapi.weixin.qq.com/cgi-bin/media/upload?access_token=${token}&type=${type}`, {
+      method: "POST",
+      body: formData,
+    });
+    
+    const data = await res.json();
+    if (data.errcode !== 0) {
+      logger.error("WeCom API: media/upload failed", { error: data.errmsg, code: data.errcode, corpId, type, filePath });
+      throw new Error(`WeCom media/upload failed: ${data.errmsg} (${data.errcode})`);
+    }
+    
+    logger.info("WeCom API: Media uploaded successfully", { mediaId: data.media_id, type });
+    return data; // contains media_id, type, created_at
+  }
+
   async sendTextMessage(corpId, corpSecret, agentId, toUser, text) {
     logger.debug("WeCom API: Sending async text message", { corpId, agentId, toUser, textPreview: text.substring(0, 50) });
     const token = await this.getAccessToken(corpId, corpSecret);
diff --git a/wecom-message-processor.js b/wecom-message-processor.js
index ccf9de3..37f7c60 100755
--- a/wecom-message-processor.js
+++ b/wecom-message-processor.js
@@ -634,6 +634,14 @@
   }
 }
 
+function getWecomMediaType(filePath) {
+  const ext = filePath.split('.').pop().toLowerCase();
+  if (['jpg', 'jpeg', 'png'].includes(ext)) return 'image';
+  if (['mp3', 'wav', 'amr', 'm4a'].includes(ext)) return 'voice';
+  if (['mp4'].includes(ext)) return 'video';
+  return 'file';
+}
+
 export async function deliverWecomReply({ payload, senderId, streamId, isSelfBuiltAppRequest, originalMessage, account }) {
   logger.info('deliverWecomReply received payload', { payload: JSON.stringify(payload) });
   const text = payload.text || "";
@@ -665,20 +673,53 @@
   }
 
   let processedText = text;
+  if (isSelfBuiltAppRequest && mediaMatches.length === 1 && !text.replace(mediaMatches[0].fullMatch, "").trim()) {
+    // Single media item, no other text. Use passive media reply.
+    const media = mediaMatches[0];
+    const type = getWecomMediaType(media.path);
+    try {
+      logger.info("WeCom Self-Built: uploading single media for passive reply", { path: media.path, type });
+      const uploadRes = await wecomApi.uploadMedia(account.corpId, account.corpSecret, type, media.path);
+      const mediaId = uploadRes.media_id;
+      
+      const webhook = new WecomWebhook({ token: account.token, encodingAesKey: account.encodingAesKey });
+      const passiveReply = webhook.buildPassiveReplyXml(
+          originalMessage.fromUser || originalMessage.chatId || originalMessage.ToUserName, 
+          originalMessage.ToUserName,
+          type,
+          mediaId,
+          Math.floor(Date.now() / 1000),
+          originalMessage.query.nonce,
+      );
+      logger.info(`WeCom Self-Built: constructed passive ${type} XML reply`, { mediaId });
+      return { passiveReplyXml: passiveReply };
+    } catch (err) {
+      logger.error("Failed to upload media for passive reply", { error: err.message, path: media.path });
+      // Fallback to text placeholder logic below
+    }
+  }
+
   if (mediaMatches.length > 0) {
     for (const media of mediaMatches) {
       if (streamId && !isSelfBuiltAppRequest) {
-          const queued = streamManager.queueImage(streamId, media.path);
-          if (queued) {
-            processedText = processedText.replace(media.fullMatch, "").trim();
-            logger.info("Queued absolute path image for stream (AI Bot)", {
-              streamId,
-              imagePath: media.path,
-            });
+          const type = getWecomMediaType(media.path);
+          if (type === 'image') {
+              const queued = streamManager.queueImage(streamId, media.path);
+              if (queued) {
+                processedText = processedText.replace(media.fullMatch, "").trim();
+                logger.info("Queued absolute path image for stream (AI Bot)", {
+                  streamId,
+                  imagePath: media.path,
+                });
+              }
+          } else {
+              processedText = processedText.replace(media.fullMatch, `[${type}: ${media.path}]`).trim();
+              logger.warn("WeCom AI Bot: non-image media in stream, converting to text", { type, mediaPath: media.path });
           }
       } else if (isSelfBuiltAppRequest) {
-          processedText = processedText.replace(media.fullMatch, `[图片: ${media.path}]`).trim();
-          logger.warn("WeCom Self-Built: converting local media to text for passive reply", { mediaPath: media.path });
+          const type = getWecomMediaType(media.path);
+          processedText = processedText.replace(media.fullMatch, `[${type === 'voice' ? '语音' : type === 'video' ? '视频' : type === 'image' ? '图片' : '文件'}: ${media.path}]`).trim();
+          logger.warn("WeCom Self-Built: converting local media to text for passive reply (multi-item or text mixed)", { mediaPath: media.path, type });
       }
     }
   }