類似驗證碼的「文字轉換為阿拉伯數字」

分享如何把中文國字數字轉換成阿拉伯數字.

Python 腳本:

def normalize_chinese_numeric(keyword):
    ret = ""
    for char in keyword:
        converted_int =  chinese_numeric_to_int(char)
        if not converted_int is None:
            ret += str(converted_int)
    return ret

def chinese_numeric_to_int(char):
    ret = None
    my_dict = get_chinese_numeric()
    for i in my_dict:
        for item in my_dict[i]:
            if char.lower() == item:
                ret = int(i)
                break
        if not ret is None:
            break
    return ret

def get_chinese_numeric():
    my_dict = {}
    my_dict['0']=['0','0','zero','零']
    my_dict['1']=['1','1','one','一','壹','①','❶','⑴']
    my_dict['2']=['2','2','two','二','貳','②','❷','⑵']
    my_dict['3']=['3','3','three','三','叁','③','❸','⑶']
    my_dict['4']=['4','4','four','四','肆','④','❹','⑷']
    my_dict['5']=['5','5','five','五','伍','⑤','❺','⑸']
    my_dict['6']=['6','6','six','六','陸','⑥','❻','⑹']
    my_dict['7']=['7','7','seven','七','柒','⑦','❼','⑺']
    my_dict['8']=['8','8','eight','八','捌','⑧','❽','⑻']
    my_dict['9']=['9','9','nine','九','玖','⑨','❾','⑼']
    return my_dict

def format_quota_string(formated_html_text):
    formated_html_text = formated_html_text.replace('「','【')
    formated_html_text = formated_html_text.replace('『','【')
    formated_html_text = formated_html_text.replace('〔','【')
    formated_html_text = formated_html_text.replace('﹝','【')
    formated_html_text = formated_html_text.replace('〈','【')
    formated_html_text = formated_html_text.replace('《','【')
    formated_html_text = formated_html_text.replace('[','【')
    formated_html_text = formated_html_text.replace('〖','【')
    formated_html_text = formated_html_text.replace('[','【')
    formated_html_text = formated_html_text.replace('(','【')
    formated_html_text = formated_html_text.replace('(','【')

    formated_html_text = formated_html_text.replace('」','】')
    formated_html_text = formated_html_text.replace('』','】')
    formated_html_text = formated_html_text.replace('〕','】')
    formated_html_text = formated_html_text.replace('﹞','】')
    formated_html_text = formated_html_text.replace('〉','】')
    formated_html_text = formated_html_text.replace('》','】')
    formated_html_text = formated_html_text.replace(']','】')
    formated_html_text = formated_html_text.replace('〗','】')
    formated_html_text = formated_html_text.replace(']','】')
    formated_html_text = formated_html_text.replace(')','】')
    formated_html_text = formated_html_text.replace(')','】')
    return formated_html_text

def find_between( s, first, last ):
    ret = ""
    try:
        start = s.index( first ) + len( first )
        end = s.index( last, start )
        ret = s[start:end]
    except ValueError:
        pass
    return ret


captcha_text_div_text = "請將括弧內文字轉換為阿拉伯數字(一二三四五六)"
formated_html_text = format_quota_string(captcha_text_div_text)

temp_answer = find_between(formated_html_text, "【", "】")
temp_answer = temp_answer.strip()
if len(temp_answer) > 0:
    temp_answer = temp_answer.replace(' ','')

    # check raw question.
    if '數字' in captcha_text_div_text and '數字' in captcha_text_div_text and '括' in captcha_text_div_text:
        temp_answer = normalize_chinese_numeric(temp_answer)

Javascript 腳本:

// Framework: JavaScript

function normalizeChineseNumeric(keyword) {
    let result = "";
    for (let character of keyword) {
        let convertedInteger = chineseNumericToInt(character);
        if (convertedInteger !== null) {
            result += String(convertedInteger);
        }
    }
    return result;
}

function chineseNumericToInt(character) {
    let result = null;
    const numericDictionary = getChineseNumeric();
    for (let key in numericDictionary) {
        for (let item of numericDictionary[key]) {
            if (character.toLowerCase() === item) {
                result = parseInt(key);
                break;
            }
        }
        if (result !== null) {
            break;
        }
    }
    return result;
}

function getChineseNumeric() {
    const numericDictionary = {};
    numericDictionary['0'] = ['0', '0', 'zero', '零'];
    numericDictionary['1'] = ['1', '1', 'one', '一', '壹', '①', '❶', '⑴'];
    numericDictionary['2'] = ['2', '2', 'two', '二', '貳', '②', '❷', '⑵'];
    numericDictionary['3'] = ['3', '3', 'three', '三', '叁', '③', '❸', '⑶'];
    numericDictionary['4'] = ['4', '4', 'four', '四', '肆', '④', '❹', '⑷'];
    numericDictionary['5'] = ['5', '5', 'five', '五', '伍', '⑤', '❺', '⑸'];
    numericDictionary['6'] = ['6', '6', 'six', '六', '陸', '⑥', '❻', '⑹'];
    numericDictionary['7'] = ['7', '7', 'seven', '七', '柒', '⑦', '❼', '⑺'];
    numericDictionary['8'] = ['8', '8', 'eight', '八', '捌', '⑧', '❽', '⑻'];
    numericDictionary['9'] = ['9', '9', 'nine', '九', '玖', '⑨', '❾', '⑼'];
    return numericDictionary;
}

function formatQuotaString(formattedHtmlText) {
    formattedHtmlText = formattedHtmlText.replace('「', '【');
    formattedHtmlText = formattedHtmlText.replace('『', '【');
    formattedHtmlText = formattedHtmlText.replace('〔', '【');
    formattedHtmlText = formattedHtmlText.replace('﹝', '【');
    formattedHtmlText = formattedHtmlText.replace('〈', '【');
    formattedHtmlText = formattedHtmlText.replace('《', '【');
    formattedHtmlText = formattedHtmlText.replace('[', '【');
    formattedHtmlText = formattedHtmlText.replace('〖', '【');
    formattedHtmlText = formattedHtmlText.replace('[', '【');
    formattedHtmlText = formattedHtmlText.replace('(', '【');
    formattedHtmlText = formattedHtmlText.replace('(', '【');

    formattedHtmlText = formattedHtmlText.replace('」', '】');
    formattedHtmlText = formattedHtmlText.replace('』', '】');
    formattedHtmlText = formattedHtmlText.replace('〕', '】');
    formattedHtmlText = formattedHtmlText.replace('﹞', '】');
    formattedHtmlText = formattedHtmlText.replace('〉', '】');
    formattedHtmlText = formattedHtmlText.replace('》', '】');
    formattedHtmlText = formattedHtmlText.replace(']', '】');
    formattedHtmlText = formattedHtmlText.replace('〗', '】');
    formattedHtmlText = formattedHtmlText.replace(']', '】');
    formattedHtmlText = formattedHtmlText.replace(')', '】');
    formattedHtmlText = formattedHtmlText.replace(')', '】');
    return formattedHtmlText;
}

function findBetween(string, startDelimiter, endDelimiter) {
    let result = "";
    try {
        const startIndex = string.indexOf(startDelimiter) + startDelimiter.length;
        const endIndex = string.indexOf(endDelimiter, startIndex);
        result = string.substring(startIndex, endIndex);
    } catch (error) {
        // Handle error
    }
    return result;
}

const captchaTextDivText = "請將括弧內文字轉換為阿拉伯數字(一二三四五六)";
const formattedHtmlText = formatQuotaString(captchaTextDivText);

let temporaryAnswer = findBetween(formattedHtmlText, "【", "】");
temporaryAnswer = temporaryAnswer.trim();
if (temporaryAnswer.length > 0) {
    temporaryAnswer = temporaryAnswer.replace(' ', '');

    // check raw question.
    if (captchaTextDivText.includes('數字') && captchaTextDivText.includes('文字') && captchaTextDivText.includes('括')) {
        temporaryAnswer = normalizeChineseNumeric(temporaryAnswer);
    }
}

Python 與 Javascript 互相轉換:
https://products.codeporting.app/zh/convert/ai/python-to-js/