Program to remove HTML tags from a given String
Last Updated :
21 Dec, 2022
Improve
Given a string str that contains some HTML tags, the task is to remove all the tags present in the given string str.
Examples:
Input: str = "<div><b>Geeks for Geeks</b></div>" Output: Geeks for Geeks
Input: str = "<a href="https://www.geeksforgeeks.org/">GFG</a>" Output: GFG
Approach:
The idea is to use Regular Expression to solve this problem. The following steps can be followed to compute the resultant string:
- Get the string.
- Since every HTML tags are enclosed in angular brackets(<>). Therefore use replaceAll() function in regex to replace every substring start with "<" and ending with ">" to an empty string.
- The function is used as:
String str; str.replaceAll("\\", "");
Below is the implementation of the above approach:
// C++ program for the above approach
#include <iostream>
#include <regex>
using namespace std;
// Function to remove the HTML tags
// from the given string
void RemoveHTMLTags(string s)
{
const regex pattern("\\<.*?\\>");
// Use regex_replace function in regex
// to erase every tags enclosed in <>
s = regex_replace(s, pattern, "");
// Print string after removing tags
cout << s;
return;
}
// Driver Code
int main()
{
// Given String
string str = "<div><b>Geeks for Geeks</b></div>";
// Function call to print the
// HTML string after removing tags
RemoveHTMLTags(str);
return 0;
}
// This code is contributed by yuvraj_chandra
// Java program for the above approach
class GFG {
// Function to remove the HTML tags
// from the given tags
static void RemoveHTMLTags(String str)
{
// Use replaceAll function in regex
// to erase every tags enclosed in <>
str = str.replaceAll("\\<.*?\\>", "");
// Print string after removing tags
System.out.println(str);
}
// Driver Code
public static void main(String[] args)
{
String str;
// Given String
str = "<div><b>Geeks for Geeks</b></div>";
// Function call to print the
// HTML string after removing tags
RemoveHTMLTags(str);
}
}
# Python3 program for the
# above approach
import re
# Function to remove the HTML tags
# from the given tags
def RemoveHTMLTags(strr):
# Print string after removing tags
print(re.compile(r'<[^>]+>').sub('', strr))
# Driver code
if __name__=='__main__':
# Given String
strr = "<div><b>Geeks for Geeks</b></div>"
# Function call to print the HTML
# string after removing tags
RemoveHTMLTags(strr);
# This code is contributed by vikas_g
// C# program for the above approach
using System;
class GFG{
// Function to remove the HTML tags
// from the given tags
static void RemoveHTMLTags(String str)
{
// Use replaceAll function in regex
// to erase every tags enclosed in <>
// str = Regex.Replace(str, "<.*?>", String.Empty)
System.Text.RegularExpressions.Regex rx =
new System.Text.RegularExpressions.Regex("<[^>]*>");
str = rx.Replace(str, "");
// Print string after removing tags
Console.WriteLine(str);
}
// Driver code
public static void Main(String []args)
{
String str;
// Given String
str = "<div><b>Geeks for Geeks</b></div>";
// Function call to print the
// HTML string after removing tags
RemoveHTMLTags(str);
}
}
// This code is contributed by vikas_g
<script>
// JavaScript program for the above approach
// Function to remove the HTML tags
// from the given string
function RemoveHTMLTags(s) {
const pattern = new RegExp("\\<.*?\\>");
// Use regex_replace function in regex
// to erase every tags enclosed in <>
s = new String(s).replace(pattern, "");
// Print string after removing tags
document.write(s);
return;
}
// Driver Code
// Given String
let str = "<div><b>Geeks for Geeks</b></div>";
// Function call to print the
// HTML string after removing tags
RemoveHTMLTags(str);
</script>
Output:
Geeks for Geeks
Time Complexity: O(N) where N is the length of the given string.
Auxiliary Space: O(1)