import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.JOptionPane;
import java.sql.*;
public class StringExtractor
{
public static void main(String ar[])
{
}
public static ArrayList arrayListConvert(Object ob[])
{
ArrayList al=new ArrayList();
for(int i=0;i<ob.length;i++)
al.add(ob[i]);
return al;
}
//-------------------------Cartesian Product---------------------------------------
/**
* INPUT:
<pre>
1. Object[] elements
2. char seperator
1. Object[] element={"n|p","adj|adv","n|v"}
2. char seperator='|'
OUTPUT:
n,adj,n
p,adj,n
n,adv,n
p,adv,n
n,adj,v
p,adj,v
n,adv,v
p,adv,v
</pre>
*/
//----------------------------------------------------------------
public static Object[] cartesianProduct(Object[] elements,char seperator)
{
String current="";
String[][] s;
s= new String[elements.length][];
for(int i=0;i<s.length;i++)
{
current=(String) elements[i];
Object ob[]=StringExtractor.seperaterPattern(current,seperator);
s[i]=new String[ob.length];
}
for(int i=0;i<s.length;i++)
{
current=(String) elements[i];
Object ob[]=StringExtractor.seperaterPattern(current,seperator);
for(int j=0;j<s[i].length;j++)
s[i][j]=(String)ob[j];
}
return generate(s);
}
public static Object[] generate(String[][] sets)
{
ArrayList al=new ArrayList();
String elements;
int solutions = 1;
for(int i = 0; i < sets.length; solutions *= sets[i].length, i++);
for(int i = 0; i < solutions; i++)
{
elements="";
int j = 1;
for(String[] set : sets)
{
elements+=set[(i/j)%set.length] + ",";
j *= set.length;
}
al.add(elements.substring(0, elements.length()-1));
}
return al.toArray();
}
public static String sentenceCase(String source)
{
String u;
int j=0;
source=source.toLowerCase();
String t="";
while(j!=source.length()-1)
{
if(!(j<3))
{
if(source.charAt(j-2)=='.' & source.charAt(j-1)!=' ')
{
u=String.valueOf(source.charAt(j));
u.toUpperCase();
t+=u;
}
else t+=source.charAt(j);
}
j++;
}
return t;
}
public static String stringRotator(String word,int rotate)
{
int length=word.length();
for(int i=0;i<rotate;i++)
{
char ch=word.charAt(length-1);
word=ch+word;
word=word.substring(0, length);
}
return word;
}
public static String stringReverseRotator(String word,int rotate)
{
int length=word.length();
for(int i=0;i<rotate;i++)
{
char ch=word.charAt(0);
word=word+ch;
word=word.substring(1, length+1);
}
return word;
}
public static String encrypter(String source,int shift)
{
char ch=' ';
char[] chArray=source.toCharArray();
String result="";
for(int i=0;i<source.length();i++)
{
int j=chArray[i];
if(j>96 && j<123)
{
j+=shift;
if(j>122) ch=(char)(j-26);
else ch=(char)j;
result=result+ch;
}
else if(j>64&& j<91)
{
j += shift;
if(j>90) ch=(char)(j-26);
else ch=(char)j;
result=result+ch;
}
else result=result+(char)j;
}
return result;
}
public static String rEncrypter(String source,int shift)
{
Object ob[]=StringExtractor.spaceSeperater(source);
String sentence="";
for(int l=0;l<ob.length;l++)
sentence=sentence+StringExtractor.stringRotator(source, 4)+" ";
char ch=' ';
char[] chArray=source.toCharArray();
String result="";
for(int i=0;i<source.length();i++)
{
int j=chArray[i];
if(j>96 && j<123)
{
j+=shift;
if(j>122) ch=(char)(j-26);
else ch=(char)j;
result=result+ch;
}
else if(j>64&& j<91)
{
j += shift;
if(j>90) ch=(char)(j-26);
else ch=(char)j;
result=result+ch;
}
else result=result+(char)j;
}
sentence="";
for(int l=0;l<ob.length;l++)
sentence=sentence+StringExtractor.stringRotator(source, 2)+" ";
return result;
}
public static String rDecrypter(String source,int shift)
{
Object ob[]=StringExtractor.spaceSeperater(source);
String sentence="";
for(int l=0;l<ob.length;l++)
sentence=sentence+StringExtractor.stringReverseRotator(source, 4)+" ";
shift=26-shift;
char ch=' ';
char[] chArray=source.toCharArray();
String result="";
for(int i=0;i<source.length();i++)
{
int j=chArray[i];
if(j>96 && j<123)
{
j+=shift;
if(j>122) ch=(char)(j-26);
else ch=(char)j;
result=result+ch;
}
else if(j>64&& j<91)
{
j += shift;
if(j>90) ch=(char)(j-26);
else ch=(char)j;
result=result+ch;
}
else result=result+(char)j;
}
return result;
}
public static Object[] spaceSeperater(String source) //word seperator
{
String word;
int start=0,end=0;
ArrayList a=new ArrayList();
for(int pos=0;pos<source.length();pos++)
{
if(source.charAt(pos)==' ') //if found space then
{
end=pos; //set or shift end to current pos
word=source.substring(start, end); //get the word from stand to current pos (end)
if(word.length()>0)
a.add(word);
start=end+1; //now set again start to next to current pos
}
}
if(start==0) a.add(source.substring(end, source.length()));
else a.add(source.substring(end+1, source.length()));
return a.toArray();
}
public static String decrypter(String source)
{
String lines="";
for(int i=1;i<26;i++)
{
lines=lines + StringExtractor.encrypter(source, i)+"\n";
}
return lines;
}
public static String sqBracketRemover(String source)
{
String newSource="";
char ch[]=source.toCharArray();
for(int i=0;i<source.length()-2;i++)
if(ch[i]=='['&& ch[i+2]==']'&& StringExtractor.isNum(ch[i+1])) ch[i]=ch[i+1]=ch[i+2]=' ';
for(int i=0;i<source.length();i++) newSource=newSource+ch[i];
return newSource.replace(" ", " ").replace(" "," ");
}
public static String sensibleDecrypter(String source)
{
String sqlc="",word="",result2="",result="";
try
{
Connection con=Misc.connect(null);
Statement st=con.createStatement();
Boolean found=false;
String[] combination=new String[25];
for(int j=1;j<26;j++)
{
combination[j-1]=StringExtractor.encrypter(source, j);
result2=result2+j+". "+StringExtractor.encrypter(source,j)+"\n";
//System.out.println(combination[j-1]);
}
for(int k=1;k<26;k++)
{
Object[] words=StringExtractor.seperater(combination[k-1]);
for(int i=0;i<words.length;i++)
{
word=((String) words[i]).toLowerCase();
char ch=word.charAt(0);
int j=ch;
if(j<123 && j>96)
{
sqlc="Select word from "+ch;
ResultSet rs=st.executeQuery(sqlc);
while(rs.next())
{
if(word.equals(rs.getString("word").toLowerCase()))
{
found=true;
result=combination[k-1];
break;
}
}
}
}
if(result.equals("")){}
else return result2+"\n\n"+result;
result="";
}
}
catch(Exception e){}
return result2;
}
//--------------------------------------------------------------------------------
public static int countExactWord(String source,String word)
{
int i,j,k,foundwords=0;
for(i=0;i<source.length();i++)
{
if(word.charAt(0)==source.charAt(i))
{
k=i;
for(j=0;j<word.length();j++)
if(word.charAt(j)==source.charAt(k)) k++;
else break;
if(j==word.length()) foundwords++;
}
}
return foundwords;
}
public static int countWord(String source,String word)
{
source=source.toLowerCase();
word=word.toLowerCase();
int i,j,k,foundwords=0;
for(i=0;i<source.length();i++)
{
if(word.charAt(0)==source.charAt(i))
{
k=i;
for(j=0;j<word.length();j++)
if(word.charAt(j)==source.charAt(k)) k++;
else break;
if(j==word.length()) foundwords++;
}
}
return foundwords;
}
public static Object[] sentenceSeperator(String source)
{
String word;
int start=0,end=0;
ArrayList a=new ArrayList();
for(int pos=0;pos<source.length();pos++)
{
if((source.charAt(pos)=='.')) //if found space then
{
end=pos; //set or shift end to current pos
word=source.substring(start, end); //get the word from stand to current pos (end)
for(int i=0;word.charAt(i)==' ';i++) word=word.substring(1);
if(word.length()>0) a.add(word+".");
start=end+1; //now set again start to next to current pos
}
}
if(start==0) a.add(source.substring(end, source.length()));
else a.add(source.substring(end+1, source.length()));
return a.toArray();
}
public static Object[] newLineSeperator(String source)
{
String word;
int start=0,end=0;
ArrayList a=new ArrayList();
for(int pos=0;pos<source.length();pos++)
{
if((source.charAt(pos)=='\n')||pos==source.length()-1) //if found space then
{
end=pos; //set or shift end to current pos
while(source.charAt(start)==' ')start++; //skipping starting spaces
if(pos==source.length()-1)word=source.substring(start, end+1);
else word = source.substring(start, end); //get the word from start to current pos (end)
if(word.length()>0) a.add(word);
start=end+1; //now set again start to next to current pos
}
}
if(start==0) a.add(source.substring(end, source.length()));
else a.add(source.substring(end+1, source.length()));
return a.toArray();
}
public static String stringExtractor(String source,String word,char controlChar)
{
int srcline,wrdline,wlcounter,l,temp;
String result="";
for(srcline=0;srcline<source.length()-word.length();srcline++)
{
wlcounter=0;
if(word.charAt(0)==source.charAt(srcline))
for(wrdline=0,temp=srcline+1;wrdline<word.length();wrdline++)
{
if(word.charAt(wrdline)==source.charAt(wrdline+srcline))
{
++wlcounter;
if(wlcounter==word.length())
{
for(l=srcline+wlcounter;source.charAt(l)!=controlChar;l++)
result=result+source.charAt(l);
result=result+"\n";
srcline=l;
}
}else srcline=temp;
}
}
return result;
}
public static String stringExtractor(String source,String start,String end)
{
source=source.substring(source.indexOf(start),source.length());
source=source.substring(start.length(), source.indexOf(end));
return source;
}
public static String stringWithHeadExtractor(String source, String start,String end)
{
source=source.substring(source.indexOf(start),source.length()-1);
source=source.substring(0, source.indexOf(end));
return source;
}
public static Object[] stringArrayExtractor(String source,String word,char controlChar)
{
int srcline,wrdline,wlcounter,l,temp;
String result="";
ArrayList a = new ArrayList();
for(srcline=0;srcline<source.length()-word.length();srcline++)
{
wlcounter=0;
if(word.charAt(0)==source.charAt(srcline))
for(wrdline=0,temp=srcline+1;wrdline<word.length();wrdline++)
{
if(word.charAt(wrdline)==source.charAt(wrdline+srcline))
{
++wlcounter;
if(wlcounter==word.length())
{
for(l=srcline+wlcounter;source.charAt(l)!=controlChar;l++)
{
result=result+source.charAt(l);
}
a.add(result) ;
result="";
srcline=l;
}
}else srcline=temp;
}
}
Object[] stringArray=a.toArray();
return stringArray;
}
public static Object[] seperaterPattern(String source,char control) //word seperator till char found Ex=> prem, bharti, ravi
{
int start=0;
String word;
source+=" ";
ArrayList a=new ArrayList();
for(int pos=0;pos<source.length();pos++)
{
if(source.charAt(pos)==control||pos==source.length()-1)
{
word=source.substring(start,pos);
start=pos+1;
if(word.length()>0)a.add(word);
}
}
return a.toArray();
}
public static Object[] seperater(String source) //word seperator
{
int start=0;
String word;
source+=" ";
ArrayList a=new ArrayList();
for(int pos=0;pos<source.length();pos++)
{
if((source.charAt(pos)==' '||source.charAt(pos)==','||source.charAt(pos)=='\n'||source.charAt(pos)=='?'))
{
word=source.substring(start,pos);
start=pos+1;
if(word.length()>0)a.add(word);
}
}
return a.toArray();
}
public static Object[] commaSeperater(String source) //word seperator
{
String word;
int start=0,end=0;
ArrayList a=new ArrayList();
for(int pos=0;pos<source.length();pos++)
{
if(source.charAt(pos)==',')
{
end=pos;
word=source.substring(start,end);
if(word.length()>0)a.add(word);
start=end+1;
a.add(",");
}
if((source.charAt(pos)==' ')||source.charAt(pos)=='\n'||source.charAt(pos)=='?') //if found space then
{
end=pos; //set or shift end to current pos
word=source.substring(start, end); //get the word from stand to current pos (end)
if(word.length()>0)
a.add(word);
start=end+1; //now set again start to next to current pos
}
}
if(start==0) a.add(source.substring(end, source.length()));
else a.add(source.substring(end+1, source.length()));
return a.toArray();
}
public static Object[] iSeperater(String source) //word seperator
{
String word;
int start=0,end=0;
ArrayList a=new ArrayList();
for(int pos=0;pos<source.length();pos++)
{
if((source.charAt(pos)==' ')||source.charAt(pos)==','||source.charAt(pos)=='\n'||source.charAt(pos)=='<'||source.charAt(pos)=='>') //if found space then
{
end=pos; //set or shift end to current pos
word=source.substring(start, end); //get the word from stand to current pos (end)
if(word.length()>0)
a.add(word);
start=end+1; //now set again start to next to current pos
}
}
if(start==0) a.add(source.substring(end, source.length()));
else a.add(source.substring(end+1, source.length()));
return a.toArray();
}
public static boolean isAlphabet(char ch)
{
int ascii=(int)ch;
if((ascii>=65 && ascii<=90)||(ascii>=97 && ascii<=122))return true;
else return false;
}
public static boolean isLowerAlphabet(char ch)
{
int ascii=(int)ch;
if((ascii>=97 && ascii<=122))return true;
else return false;
}
public static boolean isNum(char ch)
{
int ascii=(int)ch;
if(ascii>=48 && ascii<=57)return true;
else return false;
}
public static Object[] alphaNumSeperater(String source)
{
String word;
int start=0,end=0;
ArrayList a=new ArrayList();
for(int pos=0;pos<source.length();pos++)
{
if(source.charAt(pos)==' ') //if found space then
{
end=pos; //set or shift end to current pos
word=source.substring(start, end); //get the word from stand to current pos (end)
if(word.length()>0)
//--------------------------------------------------------------------- checking alphanumeric
for(int i=0,asci;i<word.length();i++)
{
asci = (int) word.charAt(i);
if(asci>47 && asci<58)
{
a.add(word);break;
}
}
//----------------------------------------------------------------------
start=end+1; //now set again start to next to current pos
}
}
if(start==0) a.add(source.substring(end, source.length()));
return a.toArray();
}
public static Object[] charIterator(char start,char end)
{
ArrayList al=new ArrayList();
int st=(int)start;
int en=(int)end;
for(int i=st;i<=en;i++)
{
al.add(""+(char)i);
}
return al.toArray();
}
public static String commonCharacters(String first, String second)
{
int l1=first.length(),l2=second.length();
String matchedChars="";
if(l2>l1)
{
String temp=first;
first=second;
second=temp;
l1=first.length();
l2=second.length();
}
for(int j=0;(j<l2)&&(l1>0);j++)
{
char sch=second.charAt(j);
for(int k=0;k<l1;k++)
{
char fch=first.charAt(k);
if(fch==sch)
{
matchedChars=matchedChars+first.charAt(k);
if(k==0) first=first.substring(k+1, l1);
else first=first.substring(0, k)+first.substring(k+1,l1);
l1=first.length();
break;
}
}
}
return matchedChars;
}
public static int noOfCommonCharacters(String first, String second)
{
int l1=first.length(),l2=second.length();
String matchedChars="";
if(l2>l1)
{
String temp=first;
first=second;
second=temp;
l1=first.length();
l2=second.length();
}
for(int j=0;(j<l2)&&(l1>0);j++)
{
char sch=second.charAt(j);
for(int k=0;k<l1;k++)
{
char fch=first.charAt(k);
if(fch==sch)
{
matchedChars=matchedChars+first.charAt(k);
if(k==0) first=first.substring(k+1, l1);
else first=first.substring(0, k)+first.substring(k+1,l1);
l1=first.length();
break;
}
}
}
return matchedChars.length();
}
public static boolean containsList(Object ob[],Object checklist[])
{
for(int i=0;i<checklist.length;i++)
for(int j=0;j<ob.length;j++)
if(((String)checklist[i]).toLowerCase().equals(((String)ob[j]).toLowerCase())) return true;
return false;
}
public static Object[] containsListExtract(Object ob[],Object checklist[])
{
ArrayList a=new ArrayList();
for(int i=0;i<checklist.length;i++)
for(int j=0;j<ob.length;j++)
if(((String)checklist[i]).toLowerCase().equals(((String)ob[j]).toLowerCase())) a.add((String)checklist[i]);
return a.toArray();
}
public static Object[] NumSeperater(String source)
{
Object ob[]=StringExtractor.seperater(source);
String word="";
int ascii;
Boolean num=false;
ArrayList nums= new ArrayList();
for(int i=0;i<ob.length;i++)
{
word=(String)ob[i];
for(int k=0;k<word.length();k++)
{
num=false;
ascii=word.charAt(k);
if (ascii>47 && ascii<58) num=true;
if(!num)break;
}
if(num)nums.add((String)ob[i]);
}
return nums.toArray();
}
public static Object[] uniqueFilter(Object ob[])
{
ArrayList a=new ArrayList();
int j,counter;
for(int i=0;i<ob.length;i++)
{
counter=0;
for(j=i+1;ob.length>1 && j<ob.length;j++)
if(((String)ob[i]).equals((String)ob[j]))counter++;
if(counter==0) a.add((String)ob[i]);
}
return a.toArray();
}
public static Object[] patternExtractor(String theRegex, String str2Check)
{
Pattern checkRegex= Pattern.compile(theRegex);
Matcher regexMatcher=checkRegex.matcher(str2Check);
ArrayList al=new ArrayList();
while(regexMatcher.find())
if(regexMatcher.group().length()!=0) al.add((String)regexMatcher.group().trim());
return al.toArray();
//1. " [A-Za-z]{1,100}" //for alphabet starting with space and length from 1 to 100.
//2. "[0-9]{1,100}" //searching all the numbers in the string
//3. " [0-9]{1,100}" //searching all the numbers start with space.
}
public static Object[] findEmail(String source)
{
//source=sourceFetcher.fetchSource(source);
Object[] ob=StringExtractor.iSeperater(source);
ArrayList a=new ArrayList();
String word="",prev="",next="";
char ch=' ';
int ascii,k,j;
//loop for checking each mail
for(int i=0;i<ob.length;i++)
{
word=(String)ob[i]; //taking one word at a time
if(word.contains("@") && word.contains(".")) //cheking for @ and . char
{
j=word.indexOf("@");
prev=word.substring(0,j); //prev is part of word before @ char
next=word.substring(j+1); // next is part of word after @ char
//checking for prev part
for(k=j-1;k>=0;k--)
{
ch=word.charAt(k); //getting each character for observation
ascii=ch; //getting ascii value of the char
if(!((ascii>64 && ascii<91)||(ascii>96 && ascii<123)||(ch=='.')||(ch=='_')||(ch>47 && ch<58))) break;
}
prev=prev.substring(k+1);
//chekcing for next part
for(k=0;k<next.length();k++)
{
ch=next.charAt(k);
ascii=ch;
if(!((ascii>64 && ascii<91)||(ascii>96 && ascii<123)||(ch=='.')))break;
}
next=next.substring(0,k);
//Getting finally email in word string
word=prev+"@"+next;
int ascii1=word.charAt(word.length()-1);
int ascii2=word.charAt(word.length()-2);
if(word.contains("@") && word.contains(".")&& ((ascii1>64 && ascii1<91)||(ascii1>96 && ascii1<123))&&((ascii2>64 && ascii2<91)||(ascii2>96 && ascii2<123)))
a.add(word); //add word in ArrayList
}
}
return StringExtractor.uniqueFilter(a.toArray());
}
}
No comments:
Post a Comment