Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pip install -r requirements.txt
To run the sample files follow the below steps:

```bash
python markdown_to_word_converter.py
python md2docx_python.py
Enter the path to the Markdown file (e.g., README.md): ..\sample_files\amazon_case_study.md
Enter the path for the output Word file (e.g., README.docx): ..\sample_files\amazon_case_study.docx
```
Expand Down
Empty file.
File renamed without changes.
48 changes: 48 additions & 0 deletions build/lib/md2docx_python/src/md2docx_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import markdown
from docx import Document
from bs4 import BeautifulSoup

def markdown_to_word(markdown_file, word_file):
# Reading the Markdown file
with open(markdown_file, 'r', encoding='utf-8') as file:
markdown_content = file.read()

# Converting Markdown to HTML
html_content = markdown.markdown(markdown_content)

# Creating a new Word Document
doc = Document()

# Converting HTML to text and add it to the Word Document
soup = BeautifulSoup(html_content, 'html.parser')

# Adding content to the Word Document
for element in soup:
if element.name == 'h1':
doc.add_heading(element.text, level=1)
elif element.name == 'h2':
doc.add_heading(element.text, level=2)
elif element.name == 'h3':
doc.add_heading(element.text, level=3)
elif element.name == 'p':
paragraph = doc.add_paragraph()
for child in element.children:
if child.name == 'strong':
paragraph.add_run(child.text).bold = True
elif child.name == 'em':
paragraph.add_run(child.text).italic = True
else:
paragraph.add_run(child)
elif element.name == 'ul':
for li in element.find_all('li'):
doc.add_paragraph(li.text, style='List Bullet')
elif element.name == 'ol':
for li in element.find_all('li'):
doc.add_paragraph(li.text, style='List Number')

doc.save(word_file)

markdown_file = input("Enter the path to the Markdown file (e.g., README.md): ")
word_file = input("Enter the path for the output Word file (e.g., README.docx): ")

markdown_to_word(markdown_file, word_file)
1 change: 1 addition & 0 deletions build/lib/src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# This file marks the `src` directory as a Python package.
48 changes: 48 additions & 0 deletions build/lib/src/markdown_to_word_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import markdown
from docx import Document
from bs4 import BeautifulSoup

def markdown_to_word(markdown_file, word_file):
# Reading the Markdown file
with open(markdown_file, 'r', encoding='utf-8') as file:
markdown_content = file.read()

# Converting Markdown to HTML
html_content = markdown.markdown(markdown_content)

# Creating a new Word Document
doc = Document()

# Converting HTML to text and add it to the Word Document
soup = BeautifulSoup(html_content, 'html.parser')

# Adding content to the Word Document
for element in soup:
if element.name == 'h1':
doc.add_heading(element.text, level=1)
elif element.name == 'h2':
doc.add_heading(element.text, level=2)
elif element.name == 'h3':
doc.add_heading(element.text, level=3)
elif element.name == 'p':
paragraph = doc.add_paragraph()
for child in element.children:
if child.name == 'strong':
paragraph.add_run(child.text).bold = True
elif child.name == 'em':
paragraph.add_run(child.text).italic = True
else:
paragraph.add_run(child)
elif element.name == 'ul':
for li in element.find_all('li'):
doc.add_paragraph(li.text, style='List Bullet')
elif element.name == 'ol':
for li in element.find_all('li'):
doc.add_paragraph(li.text, style='List Number')

doc.save(word_file)

markdown_file = input("Enter the path to the Markdown file (e.g., README.md): ")
word_file = input("Enter the path for the output Word file (e.g., README.docx): ")

markdown_to_word(markdown_file, word_file)
Binary file added dist/md2docx_python-python-0.2.0.tar.gz
Binary file not shown.
Binary file added dist/md2docx_python_python-0.2.0-py3-none-any.whl
Binary file not shown.
Empty file added md2docx_python/__init__.py
Empty file.
Binary file added md2docx_python/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
1 change: 1 addition & 0 deletions md2docx_python/src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# This file marks the `src` directory as a Python package.
48 changes: 48 additions & 0 deletions md2docx_python/src/md2docx_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import markdown
from docx import Document
from bs4 import BeautifulSoup

def markdown_to_word(markdown_file, word_file):
# Reading the Markdown file
with open(markdown_file, 'r', encoding='utf-8') as file:
markdown_content = file.read()

# Converting Markdown to HTML
html_content = markdown.markdown(markdown_content)

# Creating a new Word Document
doc = Document()

# Converting HTML to text and add it to the Word Document
soup = BeautifulSoup(html_content, 'html.parser')

# Adding content to the Word Document
for element in soup:
if element.name == 'h1':
doc.add_heading(element.text, level=1)
elif element.name == 'h2':
doc.add_heading(element.text, level=2)
elif element.name == 'h3':
doc.add_heading(element.text, level=3)
elif element.name == 'p':
paragraph = doc.add_paragraph()
for child in element.children:
if child.name == 'strong':
paragraph.add_run(child.text).bold = True
elif child.name == 'em':
paragraph.add_run(child.text).italic = True
else:
paragraph.add_run(child)
elif element.name == 'ul':
for li in element.find_all('li'):
doc.add_paragraph(li.text, style='List Bullet')
elif element.name == 'ol':
for li in element.find_all('li'):
doc.add_paragraph(li.text, style='List Number')

doc.save(word_file)

markdown_file = input("Enter the path to the Markdown file (e.g., README.md): ")
word_file = input("Enter the path for the output Word file (e.g., README.docx): ")

markdown_to_word(markdown_file, word_file)
99 changes: 99 additions & 0 deletions md2docx_python_python.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
Metadata-Version: 2.1
Name: md2docx-python-python
Version: 0.1.0
Summary: Markdown to Word Converter.
Simple and straight forward Python utility
that converts a Markdown file (`.md`) to a Microsoft
Word document (`.docx`). It supports basic Markdown
elements, including headings, bold and italic text,
and both unordered and ordered lists.
Home-page: https://github.com/shloktech/md2docx-python
Author: Shlok Tadilkar
Author-email: shloktadilkar@gmail.com
License: MIT
Description: # Markdown to Word Converter

## Overview

Simple and straight forward Python utility that converts a Markdown file (`.md`) to a Microsoft Word document (`.docx`). It supports basic Markdown elements, including headings, bold and italic text, and both unordered and ordered lists.

#### Input .md file:
![image](https://github.com/user-attachments/assets/c2325e52-05a7-4e11-8f28-4eeb3d8c06f5)

#### Output .docx file:
![image](https://github.com/user-attachments/assets/3e48a9dd-8fe3-43cc-8246-164c58e95179)


## Features

- Converts Markdown headers (`#`, `##`, `###`) to Word document headings.
- Supports bold and italic text formatting.
- Converts unordered (`*`, `-`) and ordered (`1.`, `2.`) lists.
- Handles paragraphs with mixed content.

## Prerequisites

You need to have Python installed on your system along with the following libraries:

- `markdown` for converting Markdown to HTML.
- `python-docx` for creating and editing Word documents.
- `beautifulsoup4` for parsing HTML.

You can install the required libraries using pip:

```bash
pip install -r requirements.txt
```

To run the sample files follow the below steps:

```bash
python md2docx_python.py
Enter the path to the Markdown file (e.g., README.md): ..\sample_files\amazon_case_study.md
Enter the path for the output Word file (e.g., README.docx): ..\sample_files\amazon_case_study.docx
```

## Why this repo and not others ?

Here are some reasons why this repo might be considered better or more suitable for certain use cases compared to other scripts available on the internet:

### 1. **Comprehensive Markdown Support**
- **Header Levels**: The script supports multiple header levels (`h1`, `h2`, `h3`), which is important for properly structuring the document.
- **Bold and Italic Text**: It handles bold (`**`) and italic (`*`) text, providing more accurate formatting in the Word document.

### 2. **Proper List Formatting**
- **Unordered and Ordered Lists**: The script correctly formats both unordered (`*`, `-`) and ordered lists (`1.`, `2.`) in the Word document. This ensures that lists appear as expected without additional line breaks or formatting issues.

### 3. **Use of Well-Supported Libraries**
- **Markdown to HTML Conversion**: Utilizes the `markdown` library, which is a widely used and reliable tool for converting Markdown to HTML.
- **HTML Parsing and Word Document Creation**: Employs `BeautifulSoup` for parsing HTML and `python-docx` for creating Word documents, both of which are robust and well-maintained libraries.

### 4. **Simplicity and Readability**
- **Clear Code Structure**: The script is designed to be straightforward and easy to understand, making it accessible for users who may want to customize or extend it.
- **Basic Markdown Elements**: Focuses on the most commonly used Markdown elements, ensuring compatibility with a wide range of Markdown files without unnecessary complexity.

### 5. **Customizability**
- **Easy to Modify**: Users can easily adjust the script to handle additional Markdown features or customize the output format based on their specific needs.
- **Example Usage**: Provides a clear example of how to use the script, making it easy for users to adapt it for their own files.

### 6. **Minimal Dependencies**
- **Lightweight and Focused**: The script relies on only a few libraries, which reduces potential conflicts and keeps the script lightweight.

### 7. **Handles Basic HTML Tags**
- **Text Formatting**: Properly handles bold and italic text by interpreting HTML tags (`strong`, `em`), ensuring that formatting is preserved when converting to Word.

### 8. **Privacy**
- If you are working in a corporate firm and you want to convert your markdown files to word and you use a online tool to do it then there are chances that they will store your file which can cause to a vital information leak of your company. With use of this repo you can easily do the conversion in your own system.

### Comparison to Other Scripts
- **Feature Set**: Some scripts may lack comprehensive support for Markdown features or may not handle lists and text formatting well.
- **Performance**: Depending on the implementation, performance might vary. This script is designed to be efficient for typical Markdown files.
- **User-Friendliness**: The clear and concise code in this script may make it more user-friendly and easier to modify compared to more complex alternatives.

Overall, this script provides a balanced combination of functionality, simplicity, and ease of use, which can be advantageous for many users looking to convert Markdown files to Word documents.

For any queries please start a discussion I will be happy to answer your queries :)

Platform: UNKNOWN
Requires-Python: >=3.9.0
Description-Content-Type: text/markdown
9 changes: 9 additions & 0 deletions md2docx_python_python.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
setup.py
md2docx_python/__init__.py
md2docx_python/src/__init__.py
md2docx_python/src/md2docx_python.py
md2docx_python_python.egg-info/PKG-INFO
md2docx_python_python.egg-info/SOURCES.txt
md2docx_python_python.egg-info/dependency_links.txt
md2docx_python_python.egg-info/requires.txt
md2docx_python_python.egg-info/top_level.txt
1 change: 1 addition & 0 deletions md2docx_python_python.egg-info/dependency_links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

3 changes: 3 additions & 0 deletions md2docx_python_python.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
markdown
python-docx
beautifulsoup4
1 change: 1 addition & 0 deletions md2docx_python_python.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
md2docx_python
6 changes: 6 additions & 0 deletions run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from md2docx_python import markdown_to_word

markdown_file = "md2docx_python-python\sample_files\amazon_case_study.docx"
word_file = "md2docx_python-python\sample_files\amazon_case_study2.md"

markdown_to_word(markdown_file, word_file)
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
long_description = f.read()

setup(
name='md2docx-python',
name='md2docx_python-python',
version='0.1.0',
url='https://github.com/shloktech/md2docx-python',
author='Shlok Tadilkar',
author_email='shloktadilkar@gmail.com',
license='MIT',
description="""Markdown to Word Converter.
Simple and straight forward Python utility
that converts a Markdown file (`.md`) to a Microsoft
Expand All @@ -21,6 +22,7 @@
long_description_content_type='text/markdown',
packages=find_packages(),
install_requires=['markdown', 'python-docx', 'beautifulsoup4'],
python_requires=">=3.9.0",
)


Binary file removed src/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file not shown.
Loading