AlanQuatermain/gist:2219930

## gistfile1.m
+ (void) initialize
{
    if ( self != [KHLineBreaker class] )
        return;

    typedef BOOL (^charsetClassMatchingRule)(NSCharacterSet * charset, NSAttributedString * attrStr, NSUInteger index);
    charsetClassMatchingRule inCharset = ^(NSCharacterSet * charset, NSAttributedString * attrStr, NSUInteger index) {
        return ( [charset characterIsMember: [[attrStr string] characterAtIndex: index]] );
    };
    BOOL (^isOrnamentedCharacterComplex)(NSAttributedString*, NSUInteger) = ^BOOL(NSAttributedString * attrStr, NSUInteger index) {
        // match if current or following character is set super or subscript
        NSDictionary * attrsOn = [attrStr attributesAtIndex: index effectiveRange: NULL];
        if ( [[attrsOn objectForKey: NSSuperscriptAttributeName] intValue] != 0 )
            return ( YES );
        if ( [[[attrStr attributesAtIndex: index+1 effectiveRange: NULL] objectForKey: NSSuperscriptAttributeName] intValue] != 0 )
            return ( YES );
        return ( NO );
    };

    typedef BOOL (^attributeMatchingRule)(NSString * name, id value, NSAttributedString * attrStr, NSUInteger index);
    attributeMatchingRule hasAttribute = ^BOOL(NSString * name, id value, NSAttributedString * attrStr, NSUInteger index) {
        id found = [[attrStr attributesAtIndex: index effectiveRange: NULL] objectForKey: name];
        if ( value == nil )
            return ( found != nil );
        return ( [found isEqual: value] );
    };

    ///////////////////////////////////////////////////////////////////////////////
    // setup character class detection
    kMatchingRules[0]  = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self openingBrackets], aStr, idx); };
    kMatchingRules[1]  = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self closingBrackets], aStr, idx); };
    kMatchingRules[2]  = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self hyphens], aStr, idx); };
    kMatchingRules[3]  = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self dividingPunctuation], aStr, idx); };
    kMatchingRules[4]  = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self middleDots], aStr, idx); };
    kMatchingRules[5]  = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self fullStops], aStr, idx); };
    kMatchingRules[6]  = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self commas], aStr, idx); };
    kMatchingRules[7]  = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self inseparableCharacters], aStr, idx); };
    kMatchingRules[8]  = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self iterationMarks], aStr, idx); };
    kMatchingRules[9]  = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self prolongedSoundMark], aStr, idx); };
    kMatchingRules[10] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self smallKana], aStr, idx); };
    kMatchingRules[11] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self prefixedAbbreviations], aStr, idx); };
    kMatchingRules[12] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self postfixedAbbreviations], aStr, idx); };
    kMatchingRules[13] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self fullWidthIdeographicSpace], aStr, idx); };
    kMatchingRules[14] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self hiragana], aStr, idx); };
    kMatchingRules[15] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self katakana], aStr, idx); };
    kMatchingRules[16] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self mathSymbols], aStr, idx); };
    kMatchingRules[17] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self mathOperators], aStr, idx); };
    kMatchingRules[18] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self ideographicCharacters], aStr, idx); };
    kMatchingRules[19] = ^(NSAttributedString * aStr, NSUInteger idx){ return hasAttribute(KHReferenceMarkAttributeName, @YES, aStr, idx); };//[self referenceMarks];
    kMatchingRules[20] = ^(NSAttributedString * aStr, NSUInteger idx){ return isOrnamentedCharacterComplex(aStr, idx); };//[self ornamentedCharacterComplexes];
    kMatchingRules[21] = ^(NSAttributedString * aStr, NSUInteger idx){ return hasAttribute(KHRubyCharactersAttributeName, nil, aStr, idx); };//[self referenceMarks];;//[self simpleRubyCharacterComplexes];
    kMatchingRules[22] = ^(NSAttributedString * aStr, NSUInteger idx){ return hasAttribute(KHRubyCharactersAttributeName, nil, aStr, idx); };//[self jukugoRubyCharacterComplexes];
    kMatchingRules[23] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self groupedNumerals], aStr, idx); };
    kMatchingRules[24] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self unitSymbols], aStr, idx); };
    kMatchingRules[25] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self westernWordSpace], aStr, idx); };
    kMatchingRules[26] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self westernCharacters], aStr, idx); };
    kMatchingRules[27] = ^BOOL(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self warichuOpeningBrackets], aStr, idx) && hasAttribute(KHInlineCuttingNoteAttributeName, @YES, aStr, idx+1); };
    kMatchingRules[28] = ^BOOL(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self warichuClosingBrackets], aStr, idx) && hasAttribute(KHInlineCuttingNoteAttributeName, @YES, aStr, idx+1); };
    kMatchingRules[29] = ^(NSAttributedString * aStr, NSUInteger idx){ return hasAttribute(KHUseHorizontalInVerticalAttributeName, @YES, aStr, idx); };//[self tateChuYoko];

    /////////////////////////////////////////////////////////////////////
    // Setup rulesets
    BOOL (^oneCharInSet)(unichar, unichar, NSCharacterSet*) = ^BOOL(unichar be, unichar af, NSCharacterSet * set) {
        return ( [set characterIsMember: be] || [set characterIsMember: af] );
    };
    BOOL (^oneIsMiddleDot)(unichar, unichar) = ^BOOL(unichar be, unichar af) {
        return ( be == 0x30FB || af == 0x30FB );
    };
    BOOL (^twoIdenticalEllipses)(unichar, unichar) = ^BOOL(unichar be, unichar af) {
        return ( (be == 0x2026 && af == 0x2026) || (be == 0x2025 && af == 0x2025) );
    };
    BOOL (^oneIsIdeographicIterationMark)(unichar, unichar) = ^BOOL(unichar be, unichar af) {
        return ( be == 0x3005 || af == 0x3005 || be == 0x303B || af == 0x303B );
    };
    BOOL (^oneIsPercentSign)(unichar, unichar) = ^BOOL(unichar be, unichar af) {
        return ( be == (unichar)'%' || af == (unichar)'%' );
    };

    kRulesets[KHLineBreakRulesVeryLoose][3] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self hyphens]); };
    kRulesets[KHLineBreakRulesVeryLoose][4] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self dividingPunctuation]); };
    kRulesets[KHLineBreakRulesVeryLoose][5] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self middleDots]); };
    kRulesets[KHLineBreakRulesVeryLoose][8] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self inseparableCharacters]); };
    kRulesets[KHLineBreakRulesVeryLoose][9] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self iterationMarks]); };
    kRulesets[KHLineBreakRulesVeryLoose][10] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self prolongedSoundMark]); };
    kRulesets[KHLineBreakRulesVeryLoose][11] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self smallKana]); };
    kRulesets[KHLineBreakRulesVeryLoose][12] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self prefixedAbbreviations]); };
    kRulesets[KHLineBreakRulesVeryLoose][13] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self postfixedAbbreviations]); };

    kRulesets[KHLineBreakRulesLoose][3] = kRulesets[KHLineBreakRulesVeryLoose][3];
    kRulesets[KHLineBreakRulesLoose][5] = oneIsMiddleDot;
    kRulesets[KHLineBreakRulesLoose][8] = twoIdenticalEllipses;
    kRulesets[KHLineBreakRulesLoose][9] = oneIsIdeographicIterationMark;
    kRulesets[KHLineBreakRulesLoose][10] = kRulesets[KHLineBreakRulesVeryLoose][10];
    kRulesets[KHLineBreakRulesLoose][11] = kRulesets[KHLineBreakRulesVeryLoose][11];
    kRulesets[KHLineBreakRulesLoose][13] = oneIsPercentSign;
    kRulesets[KHLineBreakRulesLoose][25] = oneIsMiddleDot;
    kRulesets[KHLineBreakRulesLoose][27] = oneIsPercentSign;

    kRulesets[KHLineBreakRulesStrict][9] = oneIsIdeographicIterationMark;
    kRulesets[KHLineBreakRulesStrict][10] = kRulesets[KHLineBreakRulesVeryLoose][10];
    kRulesets[KHLineBreakRulesStrict][11] = kRulesets[KHLineBreakRulesVeryLoose][11];

    // KHLineBreakRulesVeryStrict doesn't have any additional break possibilities

    /////////////////////////////////////////////////////////////////////
    // TODO: setup special rule handler blocks

    // no line break opportunity between certain pairs of characters within the inseparable character class
    kSpecialBreakingRules[5] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakPoint, KHLineBreakRuleset ruleset) {
        unichar be = [[attrStr string] characterAtIndex: prospectiveBreakPoint-1];
        unichar af = [[attrStr string] characterAtIndex: prospectiveBreakPoint];
        NSCharacterSet * set = [self inseparableCharacters];
        if ( [set characterIsMember: be] && [set characterIsMember: af] )
        {
            if ( be == 0x2025 && af == 0x2025 )         // two two-dot leaders
                return ( NO );
            else if ( be == 0x2026 && af == 0x2026 )    // two ellipses
                return ( NO );
            else if ( be == 0x2014 && af == 0x2014 )    // two em-dashes
                return ( NO );
            else if ( be == 0x3033 && af == 0x3035 )    // vertical kana repeat mark upper & lower halves
                return ( NO );
            else if ( be == 0x3035 && af == 0x3035 )    // vertical kana repeat mark voiced upper & lower halves
                return ( NO );
        }

        return ( YES );
    };

    // no line break opportunity between characters within the same ornamented character complex
    kSpecialBreakingRules[6] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakPoint, KHLineBreakRuleset ruleset) {
        // we know it's a match for the class already -- just get the appropriate attribute range
        NSRange r = {NSNotFound, 0};
        NSDictionary * attrs = [attrStr attributesAtIndex: prospectiveBreakPoint-1 effectiveRange: &r];
        if ( [[attrs objectForKey: NSSuperscriptAttributeName] intValue] == 0 )
        {
            // prior character is start of ornamented complex (the base character), so no break
            return ( NO );
        }
        else if ( NSMaxRange(r) > prospectiveBreakPoint )
        {
            // the same ornament text extends beyond prospective break point, so no break
            return ( NO );
        }

        // otherwise, it's OK to break here
        return ( YES );
    };

    // no line break opportunity within a single run of Ruby characters
    kSpecialBreakingRules[7] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakPoint, KHLineBreakRuleset ruleset) {
        // we know preceding char has ruby text attached-- what's the range of that attribute?
        NSRange r = {NSNotFound, 0};
        (void) [attrStr attributesAtIndex: prospectiveBreakPoint-1 effectiveRange: &r];
        if ( NSMaxRange(r) > prospectiveBreakPoint )
        {
            // current ruby complex extends across this prospective line break
            return ( NO );
        }

        // otherwise, the current ruby complex ends at the preceding character, so we can break here
        return ( YES );
    };

    // A simplistic interpretaion of the following:
    /* A line break opportunity exists between two consecutive base characters belonging to different jukugo-ruby character complexes (cl-23). There is also a line break opportunity between two consecutive base characters belonging to the same jukugo-ruby character complex (cl-23) and between two runs of ruby text accompanying the corresponding base characters. However, a base character and the accompanying ruby text shall be indivisible, hence there is no line break opportunity between any two consecutive ruby characters in a run of ruby text accompanying a base character.
     */
    kSpecialBreakingRules[8] = kSpecialBreakingRules[7];

    // No line breaks between grouped numerals and postfixed abbreviations.
    // This is overridden by some rulesets.
    kSpecialBreakingRules[9] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakPoint, KHLineBreakRuleset ruleset) {
        unichar be = [[attrStr string] characterAtIndex: prospectiveBreakPoint-1];
        unichar af = [[attrStr string] characterAtIndex: prospectiveBreakPoint];
        if ( [[self groupedNumerals] characterIsMember: be] && [[self postfixedAbbreviations] characterIsMember: af] )
        {
            // check with the ruleset
            rulesetAddition rule = kRulesets[ruleset][KHCharacterClassPostfixedAbbreviations];
            if ( rule == nil )
                return ( NO );      // default is not to allow a break here

            // otherwise, we let the rule decide
            return ( rule(be, af) );
        }

        // otherwise, break is OK by this rule
        return ( YES );
    };

    // Whether to allow line breaks between grouped numerals and trailing western characters. Could go either way.
    kSpecialBreakingRules[10] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakpoint, KHLineBreakRuleset ruleset) {
        unichar be = [[attrStr string] characterAtIndex: prospectiveBreakpoint-1];
        unichar af = [[attrStr string] characterAtIndex: prospectiveBreakpoint];

        if ( [[self groupedNumerals] characterIsMember: be] && [[self westernCharacters] characterIsMember: af] )
            return ( allowBreaksBetweenNumeralsAndTrailingWesternCharacters );      // global setting to choose desired behaviour

        // otherwise, we don't prohibit it by this rule
        return ( YES );
    };

    // not currently implemented:
    /* A line break opportunity generally exists between preceding Western characters (cl-27) and trailing postfixed abbreviations (cl-13), unless the preceding Western character (cl-27) is used as a symbol of a quantity or a European numeral, in which case a line break is not allowed between them.
     */

    // Western characters can only be broken at valid hyphenation points, and only by inserting a hyphen
    kSpecialBreakingRules[12] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakpoint, KHLineBreakRuleset ruleset) {
        unichar be = [[attrStr string] characterAtIndex: prospectiveBreakpoint-1];
        unichar af = [[attrStr string] characterAtIndex: prospectiveBreakpoint];

        if ( [[self westernCharacters] characterIsMember: be] && [[self westernCharacters] characterIsMember: af] )
        {
            // find the range of the complete word
            NSRange upTo = NSMakeRange(0, prospectiveBreakpoint);
            NSRange from = NSMakeRange(prospectiveBreakpoint, [attrStr length]-prospectiveBreakpoint);
            NSCharacterSet * set = [NSCharacterSet whitespaceAndNewlineCharacterSet];

            NSRange wordRange = {NSNotFound, 0};
            wordRange.location = [[attrStr string] rangeOfCharacterFromSet: set options: NSBackwardsSearch range: upTo].location+1;
            wordRange.length = [[attrStr string] rangeOfCharacterFromSet: set options: 0 range: from].location - wordRange.location;

            // try to determine the language of this word
            static NSLinguisticTagger * __tagger = nil;
            static dispatch_once_t onceToken;
            dispatch_once(&onceToken, ^{
                __tagger = [[NSLinguisticTagger alloc] initWithTagSchemes: @[ NSLinguisticTagSchemeLanguage ] options: 0];
            });

            CFLocaleRef locale = NULL;
            @synchronized(__tagger)
            {
                [__tagger setString: [[attrStr string] substringWithRange: wordRange]];
                NSString * language = [__tagger tagAtIndex: 0 scheme: NSLinguisticTagSchemeLanguage tokenRange: NULL sentenceRange: NULL];
                if ( language != nil )
                {
                    CFStringRef localeID = CFLocaleCreateCanonicalLanguageIdentifierFromString(kCFAllocatorDefault, (__bridge CFStringRef)language);
                    locale = CFLocaleCreate(kCFAllocatorDefault, localeID);
                    CFRelease(localeID);
                }
                else
                {
                    locale = CFLocaleCreate(kCFAllocatorDefault, CFSTR("en"));
                }
            }

            CFIndex idx = CFStringGetHyphenationLocationBeforeIndex((__bridge CFStringRef)[attrStr string], prospectiveBreakpoint+1, CFRangeMake(wordRange.location, wordRange.length), 0, locale, NULL);
            CFRelease(locale);

            // if the given breakpoint isn't a good place for hyphenation, then we can't break here
            if ( idx != prospectiveBreakpoint )
                return ( NO );
        }

        // no prohibition on line breaks according to this rule
        return ( YES );
    };

    // No line break opportunity exists between characters within the same range of tate-chu-yoko (horizontal-in-vertical)
    kSpecialBreakingRules[12] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakpoint, KHLineBreakRuleset ruleset) {
        if ( [self characterAtIndex: prospectiveBreakpoint-1 ofString: attrStr isMemberOfClass: KHCharacterClassCharactersInTateChuYoko] )
        {
            NSRange r = {NSNotFound, 0};
            (void) [attrStr attributesAtIndex: prospectiveBreakpoint-1 effectiveRange: &r];
            if ( NSMaxRange(r) > prospectiveBreakpoint )
                return ( NO );
        }

        // no prohibition under this rule
        return ( YES );
    };
}
	+ (void) initialize
	{
	if ( self != [KHLineBreaker class] )
	return;

	typedef BOOL (^charsetClassMatchingRule)(NSCharacterSet * charset, NSAttributedString * attrStr, NSUInteger index);
	charsetClassMatchingRule inCharset = ^(NSCharacterSet * charset, NSAttributedString * attrStr, NSUInteger index) {
	return ( [charset characterIsMember: [[attrStr string] characterAtIndex: index]] );
	};
	BOOL (^isOrnamentedCharacterComplex)(NSAttributedString, NSUInteger) = ^BOOL(NSAttributedString attrStr, NSUInteger index) {
	// match if current or following character is set super or subscript
	NSDictionary * attrsOn = [attrStr attributesAtIndex: index effectiveRange: NULL];
	if ( [[attrsOn objectForKey: NSSuperscriptAttributeName] intValue] != 0 )
	return ( YES );
	if ( [[[attrStr attributesAtIndex: index+1 effectiveRange: NULL] objectForKey: NSSuperscriptAttributeName] intValue] != 0 )
	return ( YES );
	return ( NO );
	};

	typedef BOOL (^attributeMatchingRule)(NSString * name, id value, NSAttributedString * attrStr, NSUInteger index);
	attributeMatchingRule hasAttribute = ^BOOL(NSString * name, id value, NSAttributedString * attrStr, NSUInteger index) {
	id found = [[attrStr attributesAtIndex: index effectiveRange: NULL] objectForKey: name];
	if ( value == nil )
	return ( found != nil );
	return ( [found isEqual: value] );
	};

	///////////////////////////////////////////////////////////////////////////////
	// setup character class detection
	kMatchingRules[0] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self openingBrackets], aStr, idx); };
	kMatchingRules[1] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self closingBrackets], aStr, idx); };
	kMatchingRules[2] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self hyphens], aStr, idx); };
	kMatchingRules[3] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self dividingPunctuation], aStr, idx); };
	kMatchingRules[4] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self middleDots], aStr, idx); };
	kMatchingRules[5] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self fullStops], aStr, idx); };
	kMatchingRules[6] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self commas], aStr, idx); };
	kMatchingRules[7] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self inseparableCharacters], aStr, idx); };
	kMatchingRules[8] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self iterationMarks], aStr, idx); };
	kMatchingRules[9] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self prolongedSoundMark], aStr, idx); };
	kMatchingRules[10] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self smallKana], aStr, idx); };
	kMatchingRules[11] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self prefixedAbbreviations], aStr, idx); };
	kMatchingRules[12] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self postfixedAbbreviations], aStr, idx); };
	kMatchingRules[13] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self fullWidthIdeographicSpace], aStr, idx); };
	kMatchingRules[14] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self hiragana], aStr, idx); };
	kMatchingRules[15] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self katakana], aStr, idx); };
	kMatchingRules[16] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self mathSymbols], aStr, idx); };
	kMatchingRules[17] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self mathOperators], aStr, idx); };
	kMatchingRules[18] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self ideographicCharacters], aStr, idx); };
	kMatchingRules[19] = ^(NSAttributedString * aStr, NSUInteger idx){ return hasAttribute(KHReferenceMarkAttributeName, @YES, aStr, idx); };//[self referenceMarks];
	kMatchingRules[20] = ^(NSAttributedString * aStr, NSUInteger idx){ return isOrnamentedCharacterComplex(aStr, idx); };//[self ornamentedCharacterComplexes];
	kMatchingRules[21] = ^(NSAttributedString * aStr, NSUInteger idx){ return hasAttribute(KHRubyCharactersAttributeName, nil, aStr, idx); };//[self referenceMarks];;//[self simpleRubyCharacterComplexes];
	kMatchingRules[22] = ^(NSAttributedString * aStr, NSUInteger idx){ return hasAttribute(KHRubyCharactersAttributeName, nil, aStr, idx); };//[self jukugoRubyCharacterComplexes];
	kMatchingRules[23] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self groupedNumerals], aStr, idx); };
	kMatchingRules[24] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self unitSymbols], aStr, idx); };
	kMatchingRules[25] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self westernWordSpace], aStr, idx); };
	kMatchingRules[26] = ^(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self westernCharacters], aStr, idx); };
	kMatchingRules[27] = ^BOOL(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self warichuOpeningBrackets], aStr, idx) && hasAttribute(KHInlineCuttingNoteAttributeName, @YES, aStr, idx+1); };
	kMatchingRules[28] = ^BOOL(NSAttributedString * aStr, NSUInteger idx){ return inCharset([self warichuClosingBrackets], aStr, idx) && hasAttribute(KHInlineCuttingNoteAttributeName, @YES, aStr, idx+1); };
	kMatchingRules[29] = ^(NSAttributedString * aStr, NSUInteger idx){ return hasAttribute(KHUseHorizontalInVerticalAttributeName, @YES, aStr, idx); };//[self tateChuYoko];

	/////////////////////////////////////////////////////////////////////
	// Setup rulesets
	BOOL (^oneCharInSet)(unichar, unichar, NSCharacterSet) = ^BOOL(unichar be, unichar af, NSCharacterSet set) {
	return ( [set characterIsMember: be] \|\| [set characterIsMember: af] );
	};
	BOOL (^oneIsMiddleDot)(unichar, unichar) = ^BOOL(unichar be, unichar af) {
	return ( be == 0x30FB \|\| af == 0x30FB );
	};
	BOOL (^twoIdenticalEllipses)(unichar, unichar) = ^BOOL(unichar be, unichar af) {
	return ( (be == 0x2026 && af == 0x2026) \|\| (be == 0x2025 && af == 0x2025) );
	};
	BOOL (^oneIsIdeographicIterationMark)(unichar, unichar) = ^BOOL(unichar be, unichar af) {
	return ( be == 0x3005 \|\| af == 0x3005 \|\| be == 0x303B \|\| af == 0x303B );
	};
	BOOL (^oneIsPercentSign)(unichar, unichar) = ^BOOL(unichar be, unichar af) {
	return ( be == (unichar)'%' \|\| af == (unichar)'%' );
	};

	kRulesets[KHLineBreakRulesVeryLoose][3] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self hyphens]); };
	kRulesets[KHLineBreakRulesVeryLoose][4] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self dividingPunctuation]); };
	kRulesets[KHLineBreakRulesVeryLoose][5] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self middleDots]); };
	kRulesets[KHLineBreakRulesVeryLoose][8] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self inseparableCharacters]); };
	kRulesets[KHLineBreakRulesVeryLoose][9] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self iterationMarks]); };
	kRulesets[KHLineBreakRulesVeryLoose][10] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self prolongedSoundMark]); };
	kRulesets[KHLineBreakRulesVeryLoose][11] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self smallKana]); };
	kRulesets[KHLineBreakRulesVeryLoose][12] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self prefixedAbbreviations]); };
	kRulesets[KHLineBreakRulesVeryLoose][13] = ^(unichar be, unichar af) { return oneCharInSet(be, af, [self postfixedAbbreviations]); };

	kRulesets[KHLineBreakRulesLoose][3] = kRulesets[KHLineBreakRulesVeryLoose][3];
	kRulesets[KHLineBreakRulesLoose][5] = oneIsMiddleDot;
	kRulesets[KHLineBreakRulesLoose][8] = twoIdenticalEllipses;
	kRulesets[KHLineBreakRulesLoose][9] = oneIsIdeographicIterationMark;
	kRulesets[KHLineBreakRulesLoose][10] = kRulesets[KHLineBreakRulesVeryLoose][10];
	kRulesets[KHLineBreakRulesLoose][11] = kRulesets[KHLineBreakRulesVeryLoose][11];
	kRulesets[KHLineBreakRulesLoose][13] = oneIsPercentSign;
	kRulesets[KHLineBreakRulesLoose][25] = oneIsMiddleDot;
	kRulesets[KHLineBreakRulesLoose][27] = oneIsPercentSign;

	kRulesets[KHLineBreakRulesStrict][9] = oneIsIdeographicIterationMark;
	kRulesets[KHLineBreakRulesStrict][10] = kRulesets[KHLineBreakRulesVeryLoose][10];
	kRulesets[KHLineBreakRulesStrict][11] = kRulesets[KHLineBreakRulesVeryLoose][11];

	// KHLineBreakRulesVeryStrict doesn't have any additional break possibilities

	/////////////////////////////////////////////////////////////////////
	// TODO: setup special rule handler blocks

	// no line break opportunity between certain pairs of characters within the inseparable character class
	kSpecialBreakingRules[5] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakPoint, KHLineBreakRuleset ruleset) {
	unichar be = [[attrStr string] characterAtIndex: prospectiveBreakPoint-1];
	unichar af = [[attrStr string] characterAtIndex: prospectiveBreakPoint];
	NSCharacterSet * set = [self inseparableCharacters];
	if ( [set characterIsMember: be] && [set characterIsMember: af] )
	{
	if ( be == 0x2025 && af == 0x2025 ) // two two-dot leaders
	return ( NO );
	else if ( be == 0x2026 && af == 0x2026 ) // two ellipses
	return ( NO );
	else if ( be == 0x2014 && af == 0x2014 ) // two em-dashes
	return ( NO );
	else if ( be == 0x3033 && af == 0x3035 ) // vertical kana repeat mark upper & lower halves
	return ( NO );
	else if ( be == 0x3035 && af == 0x3035 ) // vertical kana repeat mark voiced upper & lower halves
	return ( NO );
	}

	return ( YES );
	};

	// no line break opportunity between characters within the same ornamented character complex
	kSpecialBreakingRules[6] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakPoint, KHLineBreakRuleset ruleset) {
	// we know it's a match for the class already -- just get the appropriate attribute range
	NSRange r = {NSNotFound, 0};
	NSDictionary * attrs = [attrStr attributesAtIndex: prospectiveBreakPoint-1 effectiveRange: &r];
	if ( [[attrs objectForKey: NSSuperscriptAttributeName] intValue] == 0 )
	{
	// prior character is start of ornamented complex (the base character), so no break
	return ( NO );
	}
	else if ( NSMaxRange(r) > prospectiveBreakPoint )
	{
	// the same ornament text extends beyond prospective break point, so no break
	return ( NO );
	}

	// otherwise, it's OK to break here
	return ( YES );
	};

	// no line break opportunity within a single run of Ruby characters
	kSpecialBreakingRules[7] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakPoint, KHLineBreakRuleset ruleset) {
	// we know preceding char has ruby text attached-- what's the range of that attribute?
	NSRange r = {NSNotFound, 0};
	(void) [attrStr attributesAtIndex: prospectiveBreakPoint-1 effectiveRange: &r];
	if ( NSMaxRange(r) > prospectiveBreakPoint )
	{
	// current ruby complex extends across this prospective line break
	return ( NO );
	}

	// otherwise, the current ruby complex ends at the preceding character, so we can break here
	return ( YES );
	};

	// A simplistic interpretaion of the following:
	/* A line break opportunity exists between two consecutive base characters belonging to different jukugo-ruby character complexes (cl-23). There is also a line break opportunity between two consecutive base characters belonging to the same jukugo-ruby character complex (cl-23) and between two runs of ruby text accompanying the corresponding base characters. However, a base character and the accompanying ruby text shall be indivisible, hence there is no line break opportunity between any two consecutive ruby characters in a run of ruby text accompanying a base character.
	*/
	kSpecialBreakingRules[8] = kSpecialBreakingRules[7];

	// No line breaks between grouped numerals and postfixed abbreviations.
	// This is overridden by some rulesets.
	kSpecialBreakingRules[9] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakPoint, KHLineBreakRuleset ruleset) {
	unichar be = [[attrStr string] characterAtIndex: prospectiveBreakPoint-1];
	unichar af = [[attrStr string] characterAtIndex: prospectiveBreakPoint];
	if ( [[self groupedNumerals] characterIsMember: be] && [[self postfixedAbbreviations] characterIsMember: af] )
	{
	// check with the ruleset
	rulesetAddition rule = kRulesets[ruleset][KHCharacterClassPostfixedAbbreviations];
	if ( rule == nil )
	return ( NO ); // default is not to allow a break here

	// otherwise, we let the rule decide
	return ( rule(be, af) );
	}

	// otherwise, break is OK by this rule
	return ( YES );
	};

	// Whether to allow line breaks between grouped numerals and trailing western characters. Could go either way.
	kSpecialBreakingRules[10] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakpoint, KHLineBreakRuleset ruleset) {
	unichar be = [[attrStr string] characterAtIndex: prospectiveBreakpoint-1];
	unichar af = [[attrStr string] characterAtIndex: prospectiveBreakpoint];

	if ( [[self groupedNumerals] characterIsMember: be] && [[self westernCharacters] characterIsMember: af] )
	return ( allowBreaksBetweenNumeralsAndTrailingWesternCharacters ); // global setting to choose desired behaviour

	// otherwise, we don't prohibit it by this rule
	return ( YES );
	};

	// not currently implemented:
	/* A line break opportunity generally exists between preceding Western characters (cl-27) and trailing postfixed abbreviations (cl-13), unless the preceding Western character (cl-27) is used as a symbol of a quantity or a European numeral, in which case a line break is not allowed between them.
	*/

	// Western characters can only be broken at valid hyphenation points, and only by inserting a hyphen
	kSpecialBreakingRules[12] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakpoint, KHLineBreakRuleset ruleset) {
	unichar be = [[attrStr string] characterAtIndex: prospectiveBreakpoint-1];
	unichar af = [[attrStr string] characterAtIndex: prospectiveBreakpoint];

	if ( [[self westernCharacters] characterIsMember: be] && [[self westernCharacters] characterIsMember: af] )
	{
	// find the range of the complete word
	NSRange upTo = NSMakeRange(0, prospectiveBreakpoint);
	NSRange from = NSMakeRange(prospectiveBreakpoint, [attrStr length]-prospectiveBreakpoint);
	NSCharacterSet * set = [NSCharacterSet whitespaceAndNewlineCharacterSet];

	NSRange wordRange = {NSNotFound, 0};
	wordRange.location = [[attrStr string] rangeOfCharacterFromSet: set options: NSBackwardsSearch range: upTo].location+1;
	wordRange.length = [[attrStr string] rangeOfCharacterFromSet: set options: 0 range: from].location - wordRange.location;

	// try to determine the language of this word
	static NSLinguisticTagger * __tagger = nil;
	static dispatch_once_t onceToken;
	dispatch_once(&onceToken, ^{
	__tagger = [[NSLinguisticTagger alloc] initWithTagSchemes: @[ NSLinguisticTagSchemeLanguage ] options: 0];
	});

	CFLocaleRef locale = NULL;
	@synchronized(__tagger)
	{
	[__tagger setString: [[attrStr string] substringWithRange: wordRange]];
	NSString * language = [__tagger tagAtIndex: 0 scheme: NSLinguisticTagSchemeLanguage tokenRange: NULL sentenceRange: NULL];
	if ( language != nil )
	{
	CFStringRef localeID = CFLocaleCreateCanonicalLanguageIdentifierFromString(kCFAllocatorDefault, (__bridge CFStringRef)language);
	locale = CFLocaleCreate(kCFAllocatorDefault, localeID);
	CFRelease(localeID);
	}
	else
	{
	locale = CFLocaleCreate(kCFAllocatorDefault, CFSTR("en"));
	}
	}

	CFIndex idx = CFStringGetHyphenationLocationBeforeIndex((__bridge CFStringRef)[attrStr string], prospectiveBreakpoint+1, CFRangeMake(wordRange.location, wordRange.length), 0, locale, NULL);
	CFRelease(locale);

	// if the given breakpoint isn't a good place for hyphenation, then we can't break here
	if ( idx != prospectiveBreakpoint )
	return ( NO );
	}

	// no prohibition on line breaks according to this rule
	return ( YES );
	};

	// No line break opportunity exists between characters within the same range of tate-chu-yoko (horizontal-in-vertical)
	kSpecialBreakingRules[12] = ^BOOL(NSAttributedString * attrStr, NSUInteger prospectiveBreakpoint, KHLineBreakRuleset ruleset) {
	if ( [self characterAtIndex: prospectiveBreakpoint-1 ofString: attrStr isMemberOfClass: KHCharacterClassCharactersInTateChuYoko] )
	{
	NSRange r = {NSNotFound, 0};
	(void) [attrStr attributesAtIndex: prospectiveBreakpoint-1 effectiveRange: &r];
	if ( NSMaxRange(r) > prospectiveBreakpoint )
	return ( NO );
	}

	// no prohibition under this rule
	return ( YES );
	};
	}