近期完成了csdn上的部分博客转到个人博客上,csdn使用的传统编辑器,个人博客使用的markdown,所以下面分享一下两者之间转化。
npm install clean-mark --global
该工具可以将我们的博客做一个初步的转markdown,用法如下
clean-mark "https://blog.csdn.net/yyy/article/details/xxx"
双引号内部为某篇具体需要转换的博文链接,转换成功后在当前目录下会出现xxx.md
使用clean-mark工具转换的md目前有两个问题
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;public class CsdnHandle {private final String fileRoot = "H:\\CSDN"; //需修正的md文件夹路径private final String artImgFilePath = "file://D:/Program Files/Gridea/post-images/"; //md文件中本地图片根路径public static void main(String[] args) {CsdnHandle handle = new CsdnHandle();handle.process();}public void process() {String imgRoot = fileRoot+"\\img\\"; //下载的去水印图片String tomd = fileRoot+"\\tomd\\"; //转化完的md文件File file = new File(imgRoot);if (!file.exists()) {file.mkdirs();}file = new File(tomd);if (!file.exists()) {file.mkdirs();}file = new File(fileRoot);File[] fileArray = file.listFiles();for (File file1 : fileArray) {if (!file1.isDirectory()) {//判断文件是否是md文件Integer hz = file1.getAbsolutePath().indexOf(".md");if (hz != -1) {conversionMd(file1,imgRoot,tomd);System.out.println(file1.getName() + " ok");}}}}public void conversionMd(File file,String imgRootPath,String toMdPath) {List allLines = null;try {allLines = Files.readAllLines(Paths.get(file.getAbsolutePath()));} catch (IOException e) {e.printStackTrace();}int head = 0;String title = null;Pattern pattern = Pattern.compile("!\\[]\\(.*?\\)");//匹配链接int imgId = 1;OutputStream out = null;BufferedWriter bw = null;for (String line : allLines) {if(line.equals("---")) {head++;continue;}if(head == 1) {if(line.indexOf("title: ") != -1) {title = line.substring("title: ".length());title = title.replace(" ","之");title = title.replace("-","之");File mdfile = new File(toMdPath+title+".md");try {out=new FileOutputStream(mdfile);bw=new BufferedWriter(new OutputStreamWriter(out,"utf-8"));} catch (FileNotFoundException e) {e.printStackTrace();} catch (UnsupportedEncodingException e) {e.printStackTrace();}}}if(head == 2 && title != null && bw != null) {Matcher matcher = pattern.matcher(line);while (matcher.find()) {String httpLink = matcher.group(0).substring(4,matcher.group(0).length()-1);String path = linkSaveImg(imgId, title,imgRootPath,httpLink);line = line.replace(httpLink, path);imgId++;}try {bw.write(unescape(line));bw.newLine();} catch (IOException e) {e.printStackTrace();}}}try {if(bw != null)bw.close();if(out != null)out.close();} catch (IOException e) {e.printStackTrace();}}//下载图片到本地public String linkSaveImg(int imgId, String title,String filePath,String http) {String sp[] = http.split("\\?");String suffix = sp[0].substring(sp[0].lastIndexOf("."));try {URL url = new URL(sp[0]);HttpURLConnection conn = (HttpURLConnection) url.openConnection();// 设置超时间为3秒conn.setConnectTimeout(3 * 1000);// 防止屏蔽程序抓取而返回403错误conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// 得到输入流InputStream inputStream = conn.getInputStream();// 获取字节数组byte[] getData = readInputStream(inputStream);// 文件保存位置File file = new File(filePath+title+"_"+imgId+suffix);FileOutputStream fos = new FileOutputStream(file);fos.write(getData);if (fos != null) {fos.close();}if (inputStream != null) {inputStream.close();}} catch (Exception e) {e.printStackTrace();}return artImgFilePath+title+"_"+imgId+suffix;}public byte[] readInputStream(InputStream inputStream) throws IOException {byte[] buffer = new byte[1024];int len = 0;ByteArrayOutputStream bos = new ByteArrayOutputStream();while ((len = inputStream.read(buffer)) != -1) {bos.write(buffer, 0, len);}bos.close();return bos.toByteArray();}//解决乱码问题public static String unescape(String src) {int index = -1;while((index=src.indexOf("")) != -1) {String data = src.substring(index+3,index+7);char ch = (char) Integer.parseInt(data, 16);src = src.replace(""+data+";",""+ch);}return src;}}
使用Java编写这个程序的主要原因是网络相关操作真香!

链接打开会发现是一个404,没错,不用怀疑是操作错了。
在该404界面F12打开 开发者工具,并选择控制台(console)
输入如下代码回车
var s=document.createElement('script');s.type='text/javascript';document.body.appendChild(s);s.src='//cdn.jsdelivr.net/gh/ame-yu/csdn-move@latest/dist/index.js';

稍等片刻便可打包下载一个zip文件,里面包含所有的md文章,文章数越多,其等待时间越长,可将鼠标放置浏览器标签页查看其打包进度情况。

下一篇:【MyBatis】学生表格操作